1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGCXXABI.h"
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/CodeGen/ConstantInitBuilder.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "clang/Basic/BitmaskEnum.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/Bitcode/BitcodeReader.h"
24 #include "llvm/IR/DerivedTypes.h"
25 #include "llvm/IR/GlobalValue.h"
26 #include "llvm/IR/Value.h"
27 #include "llvm/Support/Format.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <cassert>
30 
31 using namespace clang;
32 using namespace CodeGen;
33 
34 namespace {
35 /// Base class for handling code generation inside OpenMP regions.
36 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
37 public:
38   /// Kinds of OpenMP regions used in codegen.
39   enum CGOpenMPRegionKind {
40     /// Region with outlined function for standalone 'parallel'
41     /// directive.
42     ParallelOutlinedRegion,
43     /// Region with outlined function for standalone 'task' directive.
44     TaskOutlinedRegion,
45     /// Region for constructs that do not require function outlining,
46     /// like 'for', 'sections', 'atomic' etc. directives.
47     InlinedRegion,
48     /// Region with outlined function for standalone 'target' directive.
49     TargetRegion,
50   };
51 
52   CGOpenMPRegionInfo(const CapturedStmt &CS,
53                      const CGOpenMPRegionKind RegionKind,
54                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
55                      bool HasCancel)
56       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
57         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
58 
59   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
60                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
61                      bool HasCancel)
62       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
63         Kind(Kind), HasCancel(HasCancel) {}
64 
65   /// Get a variable or parameter for storing global thread id
66   /// inside OpenMP construct.
67   virtual const VarDecl *getThreadIDVariable() const = 0;
68 
69   /// Emit the captured statement body.
70   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
71 
72   /// Get an LValue for the current ThreadID variable.
73   /// \return LValue for thread id variable. This LValue always has type int32*.
74   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
75 
76   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
77 
78   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
79 
80   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
81 
82   bool hasCancel() const { return HasCancel; }
83 
84   static bool classof(const CGCapturedStmtInfo *Info) {
85     return Info->getKind() == CR_OpenMP;
86   }
87 
88   ~CGOpenMPRegionInfo() override = default;
89 
90 protected:
91   CGOpenMPRegionKind RegionKind;
92   RegionCodeGenTy CodeGen;
93   OpenMPDirectiveKind Kind;
94   bool HasCancel;
95 };
96 
97 /// API for captured statement code generation in OpenMP constructs.
98 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
99 public:
100   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
101                              const RegionCodeGenTy &CodeGen,
102                              OpenMPDirectiveKind Kind, bool HasCancel,
103                              StringRef HelperName)
104       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
105                            HasCancel),
106         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
107     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
108   }
109 
110   /// Get a variable or parameter for storing global thread id
111   /// inside OpenMP construct.
112   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
113 
114   /// Get the name of the capture helper.
115   StringRef getHelperName() const override { return HelperName; }
116 
117   static bool classof(const CGCapturedStmtInfo *Info) {
118     return CGOpenMPRegionInfo::classof(Info) &&
119            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
120                ParallelOutlinedRegion;
121   }
122 
123 private:
124   /// A variable or parameter storing global thread id for OpenMP
125   /// constructs.
126   const VarDecl *ThreadIDVar;
127   StringRef HelperName;
128 };
129 
130 /// API for captured statement code generation in OpenMP constructs.
131 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
132 public:
133   class UntiedTaskActionTy final : public PrePostActionTy {
134     bool Untied;
135     const VarDecl *PartIDVar;
136     const RegionCodeGenTy UntiedCodeGen;
137     llvm::SwitchInst *UntiedSwitch = nullptr;
138 
139   public:
140     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
141                        const RegionCodeGenTy &UntiedCodeGen)
142         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
143     void Enter(CodeGenFunction &CGF) override {
144       if (Untied) {
145         // Emit task switching point.
146         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
147             CGF.GetAddrOfLocalVar(PartIDVar),
148             PartIDVar->getType()->castAs<PointerType>());
149         llvm::Value *Res =
150             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
151         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
152         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
153         CGF.EmitBlock(DoneBB);
154         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
155         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
156         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
157                               CGF.Builder.GetInsertBlock());
158         emitUntiedSwitch(CGF);
159       }
160     }
161     void emitUntiedSwitch(CodeGenFunction &CGF) const {
162       if (Untied) {
163         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
164             CGF.GetAddrOfLocalVar(PartIDVar),
165             PartIDVar->getType()->castAs<PointerType>());
166         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
167                               PartIdLVal);
168         UntiedCodeGen(CGF);
169         CodeGenFunction::JumpDest CurPoint =
170             CGF.getJumpDestInCurrentScope(".untied.next.");
171         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
172         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
173         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
174                               CGF.Builder.GetInsertBlock());
175         CGF.EmitBranchThroughCleanup(CurPoint);
176         CGF.EmitBlock(CurPoint.getBlock());
177       }
178     }
179     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
180   };
181   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
182                                  const VarDecl *ThreadIDVar,
183                                  const RegionCodeGenTy &CodeGen,
184                                  OpenMPDirectiveKind Kind, bool HasCancel,
185                                  const UntiedTaskActionTy &Action)
186       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
187         ThreadIDVar(ThreadIDVar), Action(Action) {
188     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
189   }
190 
191   /// Get a variable or parameter for storing global thread id
192   /// inside OpenMP construct.
193   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
194 
195   /// Get an LValue for the current ThreadID variable.
196   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
197 
198   /// Get the name of the capture helper.
199   StringRef getHelperName() const override { return ".omp_outlined."; }
200 
201   void emitUntiedSwitch(CodeGenFunction &CGF) override {
202     Action.emitUntiedSwitch(CGF);
203   }
204 
205   static bool classof(const CGCapturedStmtInfo *Info) {
206     return CGOpenMPRegionInfo::classof(Info) &&
207            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
208                TaskOutlinedRegion;
209   }
210 
211 private:
212   /// A variable or parameter storing global thread id for OpenMP
213   /// constructs.
214   const VarDecl *ThreadIDVar;
215   /// Action for emitting code for untied tasks.
216   const UntiedTaskActionTy &Action;
217 };
218 
219 /// API for inlined captured statement code generation in OpenMP
220 /// constructs.
221 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
222 public:
223   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
224                             const RegionCodeGenTy &CodeGen,
225                             OpenMPDirectiveKind Kind, bool HasCancel)
226       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
227         OldCSI(OldCSI),
228         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
229 
230   // Retrieve the value of the context parameter.
231   llvm::Value *getContextValue() const override {
232     if (OuterRegionInfo)
233       return OuterRegionInfo->getContextValue();
234     llvm_unreachable("No context value for inlined OpenMP region");
235   }
236 
237   void setContextValue(llvm::Value *V) override {
238     if (OuterRegionInfo) {
239       OuterRegionInfo->setContextValue(V);
240       return;
241     }
242     llvm_unreachable("No context value for inlined OpenMP region");
243   }
244 
245   /// Lookup the captured field decl for a variable.
246   const FieldDecl *lookup(const VarDecl *VD) const override {
247     if (OuterRegionInfo)
248       return OuterRegionInfo->lookup(VD);
249     // If there is no outer outlined region,no need to lookup in a list of
250     // captured variables, we can use the original one.
251     return nullptr;
252   }
253 
254   FieldDecl *getThisFieldDecl() const override {
255     if (OuterRegionInfo)
256       return OuterRegionInfo->getThisFieldDecl();
257     return nullptr;
258   }
259 
260   /// Get a variable or parameter for storing global thread id
261   /// inside OpenMP construct.
262   const VarDecl *getThreadIDVariable() const override {
263     if (OuterRegionInfo)
264       return OuterRegionInfo->getThreadIDVariable();
265     return nullptr;
266   }
267 
268   /// Get an LValue for the current ThreadID variable.
269   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
270     if (OuterRegionInfo)
271       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
272     llvm_unreachable("No LValue for inlined OpenMP construct");
273   }
274 
275   /// Get the name of the capture helper.
276   StringRef getHelperName() const override {
277     if (auto *OuterRegionInfo = getOldCSI())
278       return OuterRegionInfo->getHelperName();
279     llvm_unreachable("No helper name for inlined OpenMP construct");
280   }
281 
282   void emitUntiedSwitch(CodeGenFunction &CGF) override {
283     if (OuterRegionInfo)
284       OuterRegionInfo->emitUntiedSwitch(CGF);
285   }
286 
287   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
288 
289   static bool classof(const CGCapturedStmtInfo *Info) {
290     return CGOpenMPRegionInfo::classof(Info) &&
291            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
292   }
293 
294   ~CGOpenMPInlinedRegionInfo() override = default;
295 
296 private:
297   /// CodeGen info about outer OpenMP region.
298   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
299   CGOpenMPRegionInfo *OuterRegionInfo;
300 };
301 
302 /// API for captured statement code generation in OpenMP target
303 /// constructs. For this captures, implicit parameters are used instead of the
304 /// captured fields. The name of the target region has to be unique in a given
305 /// application so it is provided by the client, because only the client has
306 /// the information to generate that.
307 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
308 public:
309   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
310                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
311       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
312                            /*HasCancel=*/false),
313         HelperName(HelperName) {}
314 
315   /// This is unused for target regions because each starts executing
316   /// with a single thread.
317   const VarDecl *getThreadIDVariable() const override { return nullptr; }
318 
319   /// Get the name of the capture helper.
320   StringRef getHelperName() const override { return HelperName; }
321 
322   static bool classof(const CGCapturedStmtInfo *Info) {
323     return CGOpenMPRegionInfo::classof(Info) &&
324            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
325   }
326 
327 private:
328   StringRef HelperName;
329 };
330 
331 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
332   llvm_unreachable("No codegen for expressions");
333 }
334 /// API for generation of expressions captured in a innermost OpenMP
335 /// region.
336 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
337 public:
338   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
339       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
340                                   OMPD_unknown,
341                                   /*HasCancel=*/false),
342         PrivScope(CGF) {
343     // Make sure the globals captured in the provided statement are local by
344     // using the privatization logic. We assume the same variable is not
345     // captured more than once.
346     for (const auto &C : CS.captures()) {
347       if (!C.capturesVariable() && !C.capturesVariableByCopy())
348         continue;
349 
350       const VarDecl *VD = C.getCapturedVar();
351       if (VD->isLocalVarDeclOrParm())
352         continue;
353 
354       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
355                       /*RefersToEnclosingVariableOrCapture=*/false,
356                       VD->getType().getNonReferenceType(), VK_LValue,
357                       C.getLocation());
358       PrivScope.addPrivate(
359           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); });
360     }
361     (void)PrivScope.Privatize();
362   }
363 
364   /// Lookup the captured field decl for a variable.
365   const FieldDecl *lookup(const VarDecl *VD) const override {
366     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
367       return FD;
368     return nullptr;
369   }
370 
371   /// Emit the captured statement body.
372   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
373     llvm_unreachable("No body for expressions");
374   }
375 
376   /// Get a variable or parameter for storing global thread id
377   /// inside OpenMP construct.
378   const VarDecl *getThreadIDVariable() const override {
379     llvm_unreachable("No thread id for expressions");
380   }
381 
382   /// Get the name of the capture helper.
383   StringRef getHelperName() const override {
384     llvm_unreachable("No helper name for expressions");
385   }
386 
387   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
388 
389 private:
390   /// Private scope to capture global variables.
391   CodeGenFunction::OMPPrivateScope PrivScope;
392 };
393 
394 /// RAII for emitting code of OpenMP constructs.
395 class InlinedOpenMPRegionRAII {
396   CodeGenFunction &CGF;
397   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
398   FieldDecl *LambdaThisCaptureField = nullptr;
399   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
400 
401 public:
402   /// Constructs region for combined constructs.
403   /// \param CodeGen Code generation sequence for combined directives. Includes
404   /// a list of functions used for code generation of implicitly inlined
405   /// regions.
406   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
407                           OpenMPDirectiveKind Kind, bool HasCancel)
408       : CGF(CGF) {
409     // Start emission for the construct.
410     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
411         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
412     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
413     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
414     CGF.LambdaThisCaptureField = nullptr;
415     BlockInfo = CGF.BlockInfo;
416     CGF.BlockInfo = nullptr;
417   }
418 
419   ~InlinedOpenMPRegionRAII() {
420     // Restore original CapturedStmtInfo only if we're done with code emission.
421     auto *OldCSI =
422         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
423     delete CGF.CapturedStmtInfo;
424     CGF.CapturedStmtInfo = OldCSI;
425     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
426     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
427     CGF.BlockInfo = BlockInfo;
428   }
429 };
430 
431 /// Values for bit flags used in the ident_t to describe the fields.
432 /// All enumeric elements are named and described in accordance with the code
433 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
434 enum OpenMPLocationFlags : unsigned {
435   /// Use trampoline for internal microtask.
436   OMP_IDENT_IMD = 0x01,
437   /// Use c-style ident structure.
438   OMP_IDENT_KMPC = 0x02,
439   /// Atomic reduction option for kmpc_reduce.
440   OMP_ATOMIC_REDUCE = 0x10,
441   /// Explicit 'barrier' directive.
442   OMP_IDENT_BARRIER_EXPL = 0x20,
443   /// Implicit barrier in code.
444   OMP_IDENT_BARRIER_IMPL = 0x40,
445   /// Implicit barrier in 'for' directive.
446   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
447   /// Implicit barrier in 'sections' directive.
448   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
449   /// Implicit barrier in 'single' directive.
450   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
451   /// Call of __kmp_for_static_init for static loop.
452   OMP_IDENT_WORK_LOOP = 0x200,
453   /// Call of __kmp_for_static_init for sections.
454   OMP_IDENT_WORK_SECTIONS = 0x400,
455   /// Call of __kmp_for_static_init for distribute.
456   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
457   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
458 };
459 
460 namespace {
461 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
462 /// Values for bit flags for marking which requires clauses have been used.
463 enum OpenMPOffloadingRequiresDirFlags : int64_t {
464   /// flag undefined.
465   OMP_REQ_UNDEFINED               = 0x000,
466   /// no requires clause present.
467   OMP_REQ_NONE                    = 0x001,
468   /// reverse_offload clause.
469   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
470   /// unified_address clause.
471   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
472   /// unified_shared_memory clause.
473   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
474   /// dynamic_allocators clause.
475   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
476   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
477 };
478 
479 enum OpenMPOffloadingReservedDeviceIDs {
480   /// Device ID if the device was not defined, runtime should get it
481   /// from environment variables in the spec.
482   OMP_DEVICEID_UNDEF = -1,
483 };
484 } // anonymous namespace
485 
486 /// Describes ident structure that describes a source location.
487 /// All descriptions are taken from
488 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
489 /// Original structure:
490 /// typedef struct ident {
491 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
492 ///                                  see above  */
493 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
494 ///                                  KMP_IDENT_KMPC identifies this union
495 ///                                  member  */
496 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
497 ///                                  see above */
498 ///#if USE_ITT_BUILD
499 ///                            /*  but currently used for storing
500 ///                                region-specific ITT */
501 ///                            /*  contextual information. */
502 ///#endif /* USE_ITT_BUILD */
503 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
504 ///                                 C++  */
505 ///    char const *psource;    /**< String describing the source location.
506 ///                            The string is composed of semi-colon separated
507 //                             fields which describe the source file,
508 ///                            the function and a pair of line numbers that
509 ///                            delimit the construct.
510 ///                             */
511 /// } ident_t;
512 enum IdentFieldIndex {
513   /// might be used in Fortran
514   IdentField_Reserved_1,
515   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
516   IdentField_Flags,
517   /// Not really used in Fortran any more
518   IdentField_Reserved_2,
519   /// Source[4] in Fortran, do not use for C++
520   IdentField_Reserved_3,
521   /// String describing the source location. The string is composed of
522   /// semi-colon separated fields which describe the source file, the function
523   /// and a pair of line numbers that delimit the construct.
524   IdentField_PSource
525 };
526 
527 /// Schedule types for 'omp for' loops (these enumerators are taken from
528 /// the enum sched_type in kmp.h).
529 enum OpenMPSchedType {
530   /// Lower bound for default (unordered) versions.
531   OMP_sch_lower = 32,
532   OMP_sch_static_chunked = 33,
533   OMP_sch_static = 34,
534   OMP_sch_dynamic_chunked = 35,
535   OMP_sch_guided_chunked = 36,
536   OMP_sch_runtime = 37,
537   OMP_sch_auto = 38,
538   /// static with chunk adjustment (e.g., simd)
539   OMP_sch_static_balanced_chunked = 45,
540   /// Lower bound for 'ordered' versions.
541   OMP_ord_lower = 64,
542   OMP_ord_static_chunked = 65,
543   OMP_ord_static = 66,
544   OMP_ord_dynamic_chunked = 67,
545   OMP_ord_guided_chunked = 68,
546   OMP_ord_runtime = 69,
547   OMP_ord_auto = 70,
548   OMP_sch_default = OMP_sch_static,
549   /// dist_schedule types
550   OMP_dist_sch_static_chunked = 91,
551   OMP_dist_sch_static = 92,
552   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
553   /// Set if the monotonic schedule modifier was present.
554   OMP_sch_modifier_monotonic = (1 << 29),
555   /// Set if the nonmonotonic schedule modifier was present.
556   OMP_sch_modifier_nonmonotonic = (1 << 30),
557 };
558 
559 enum OpenMPRTLFunction {
560   /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
561   /// kmpc_micro microtask, ...);
562   OMPRTL__kmpc_fork_call,
563   /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
564   /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
565   OMPRTL__kmpc_threadprivate_cached,
566   /// Call to void __kmpc_threadprivate_register( ident_t *,
567   /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
568   OMPRTL__kmpc_threadprivate_register,
569   // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
570   OMPRTL__kmpc_global_thread_num,
571   // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
572   // kmp_critical_name *crit);
573   OMPRTL__kmpc_critical,
574   // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
575   // global_tid, kmp_critical_name *crit, uintptr_t hint);
576   OMPRTL__kmpc_critical_with_hint,
577   // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
578   // kmp_critical_name *crit);
579   OMPRTL__kmpc_end_critical,
580   // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
581   // global_tid);
582   OMPRTL__kmpc_cancel_barrier,
583   // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
584   OMPRTL__kmpc_barrier,
585   // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
586   OMPRTL__kmpc_for_static_fini,
587   // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
588   // global_tid);
589   OMPRTL__kmpc_serialized_parallel,
590   // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
591   // global_tid);
592   OMPRTL__kmpc_end_serialized_parallel,
593   // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
594   // kmp_int32 num_threads);
595   OMPRTL__kmpc_push_num_threads,
596   // Call to void __kmpc_flush(ident_t *loc);
597   OMPRTL__kmpc_flush,
598   // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
599   OMPRTL__kmpc_master,
600   // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
601   OMPRTL__kmpc_end_master,
602   // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
603   // int end_part);
604   OMPRTL__kmpc_omp_taskyield,
605   // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
606   OMPRTL__kmpc_single,
607   // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
608   OMPRTL__kmpc_end_single,
609   // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
610   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
611   // kmp_routine_entry_t *task_entry);
612   OMPRTL__kmpc_omp_task_alloc,
613   // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *,
614   // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t,
615   // size_t sizeof_shareds, kmp_routine_entry_t *task_entry,
616   // kmp_int64 device_id);
617   OMPRTL__kmpc_omp_target_task_alloc,
618   // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
619   // new_task);
620   OMPRTL__kmpc_omp_task,
621   // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
622   // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
623   // kmp_int32 didit);
624   OMPRTL__kmpc_copyprivate,
625   // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
626   // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
627   // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
628   OMPRTL__kmpc_reduce,
629   // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
630   // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
631   // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
632   // *lck);
633   OMPRTL__kmpc_reduce_nowait,
634   // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
635   // kmp_critical_name *lck);
636   OMPRTL__kmpc_end_reduce,
637   // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
638   // kmp_critical_name *lck);
639   OMPRTL__kmpc_end_reduce_nowait,
640   // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
641   // kmp_task_t * new_task);
642   OMPRTL__kmpc_omp_task_begin_if0,
643   // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
644   // kmp_task_t * new_task);
645   OMPRTL__kmpc_omp_task_complete_if0,
646   // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
647   OMPRTL__kmpc_ordered,
648   // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
649   OMPRTL__kmpc_end_ordered,
650   // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
651   // global_tid);
652   OMPRTL__kmpc_omp_taskwait,
653   // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
654   OMPRTL__kmpc_taskgroup,
655   // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
656   OMPRTL__kmpc_end_taskgroup,
657   // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
658   // int proc_bind);
659   OMPRTL__kmpc_push_proc_bind,
660   // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
661   // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
662   // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
663   OMPRTL__kmpc_omp_task_with_deps,
664   // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
665   // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
666   // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
667   OMPRTL__kmpc_omp_wait_deps,
668   // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
669   // global_tid, kmp_int32 cncl_kind);
670   OMPRTL__kmpc_cancellationpoint,
671   // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
672   // kmp_int32 cncl_kind);
673   OMPRTL__kmpc_cancel,
674   // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
675   // kmp_int32 num_teams, kmp_int32 thread_limit);
676   OMPRTL__kmpc_push_num_teams,
677   // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
678   // microtask, ...);
679   OMPRTL__kmpc_fork_teams,
680   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
681   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
682   // sched, kmp_uint64 grainsize, void *task_dup);
683   OMPRTL__kmpc_taskloop,
684   // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
685   // num_dims, struct kmp_dim *dims);
686   OMPRTL__kmpc_doacross_init,
687   // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
688   OMPRTL__kmpc_doacross_fini,
689   // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
690   // *vec);
691   OMPRTL__kmpc_doacross_post,
692   // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
693   // *vec);
694   OMPRTL__kmpc_doacross_wait,
695   // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
696   // *data);
697   OMPRTL__kmpc_task_reduction_init,
698   // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
699   // *d);
700   OMPRTL__kmpc_task_reduction_get_th_data,
701   // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
702   OMPRTL__kmpc_alloc,
703   // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
704   OMPRTL__kmpc_free,
705 
706   //
707   // Offloading related calls
708   //
709   // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
710   // size);
711   OMPRTL__kmpc_push_target_tripcount,
712   // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
713   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
714   // *arg_types);
715   OMPRTL__tgt_target,
716   // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
717   // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
718   // *arg_types);
719   OMPRTL__tgt_target_nowait,
720   // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
721   // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
722   // *arg_types, int32_t num_teams, int32_t thread_limit);
723   OMPRTL__tgt_target_teams,
724   // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
725   // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
726   // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
727   OMPRTL__tgt_target_teams_nowait,
728   // Call to void __tgt_register_requires(int64_t flags);
729   OMPRTL__tgt_register_requires,
730   // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
731   OMPRTL__tgt_register_lib,
732   // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
733   OMPRTL__tgt_unregister_lib,
734   // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
735   // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
736   OMPRTL__tgt_target_data_begin,
737   // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
738   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
739   // *arg_types);
740   OMPRTL__tgt_target_data_begin_nowait,
741   // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
742   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
743   OMPRTL__tgt_target_data_end,
744   // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
745   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
746   // *arg_types);
747   OMPRTL__tgt_target_data_end_nowait,
748   // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
749   // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
750   OMPRTL__tgt_target_data_update,
751   // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
752   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
753   // *arg_types);
754   OMPRTL__tgt_target_data_update_nowait,
755 };
756 
757 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
758 /// region.
759 class CleanupTy final : public EHScopeStack::Cleanup {
760   PrePostActionTy *Action;
761 
762 public:
763   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
764   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
765     if (!CGF.HaveInsertPoint())
766       return;
767     Action->Exit(CGF);
768   }
769 };
770 
771 } // anonymous namespace
772 
773 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
774   CodeGenFunction::RunCleanupsScope Scope(CGF);
775   if (PrePostAction) {
776     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
777     Callback(CodeGen, CGF, *PrePostAction);
778   } else {
779     PrePostActionTy Action;
780     Callback(CodeGen, CGF, Action);
781   }
782 }
783 
784 /// Check if the combiner is a call to UDR combiner and if it is so return the
785 /// UDR decl used for reduction.
786 static const OMPDeclareReductionDecl *
787 getReductionInit(const Expr *ReductionOp) {
788   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
789     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
790       if (const auto *DRE =
791               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
792         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
793           return DRD;
794   return nullptr;
795 }
796 
797 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
798                                              const OMPDeclareReductionDecl *DRD,
799                                              const Expr *InitOp,
800                                              Address Private, Address Original,
801                                              QualType Ty) {
802   if (DRD->getInitializer()) {
803     std::pair<llvm::Function *, llvm::Function *> Reduction =
804         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
805     const auto *CE = cast<CallExpr>(InitOp);
806     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
807     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
808     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
809     const auto *LHSDRE =
810         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
811     const auto *RHSDRE =
812         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
813     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
814     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
815                             [=]() { return Private; });
816     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
817                             [=]() { return Original; });
818     (void)PrivateScope.Privatize();
819     RValue Func = RValue::get(Reduction.second);
820     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
821     CGF.EmitIgnoredExpr(InitOp);
822   } else {
823     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
824     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
825     auto *GV = new llvm::GlobalVariable(
826         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
827         llvm::GlobalValue::PrivateLinkage, Init, Name);
828     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
829     RValue InitRVal;
830     switch (CGF.getEvaluationKind(Ty)) {
831     case TEK_Scalar:
832       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
833       break;
834     case TEK_Complex:
835       InitRVal =
836           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
837       break;
838     case TEK_Aggregate:
839       InitRVal = RValue::getAggregate(LV.getAddress());
840       break;
841     }
842     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
843     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
844     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
845                          /*IsInitializer=*/false);
846   }
847 }
848 
849 /// Emit initialization of arrays of complex types.
850 /// \param DestAddr Address of the array.
851 /// \param Type Type of array.
852 /// \param Init Initial expression of array.
853 /// \param SrcAddr Address of the original array.
854 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
855                                  QualType Type, bool EmitDeclareReductionInit,
856                                  const Expr *Init,
857                                  const OMPDeclareReductionDecl *DRD,
858                                  Address SrcAddr = Address::invalid()) {
859   // Perform element-by-element initialization.
860   QualType ElementTy;
861 
862   // Drill down to the base element type on both arrays.
863   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
864   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
865   DestAddr =
866       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
867   if (DRD)
868     SrcAddr =
869         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
870 
871   llvm::Value *SrcBegin = nullptr;
872   if (DRD)
873     SrcBegin = SrcAddr.getPointer();
874   llvm::Value *DestBegin = DestAddr.getPointer();
875   // Cast from pointer to array type to pointer to single element.
876   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
877   // The basic structure here is a while-do loop.
878   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
879   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
880   llvm::Value *IsEmpty =
881       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
882   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
883 
884   // Enter the loop body, making that address the current address.
885   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
886   CGF.EmitBlock(BodyBB);
887 
888   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
889 
890   llvm::PHINode *SrcElementPHI = nullptr;
891   Address SrcElementCurrent = Address::invalid();
892   if (DRD) {
893     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
894                                           "omp.arraycpy.srcElementPast");
895     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
896     SrcElementCurrent =
897         Address(SrcElementPHI,
898                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
899   }
900   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
901       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
902   DestElementPHI->addIncoming(DestBegin, EntryBB);
903   Address DestElementCurrent =
904       Address(DestElementPHI,
905               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
906 
907   // Emit copy.
908   {
909     CodeGenFunction::RunCleanupsScope InitScope(CGF);
910     if (EmitDeclareReductionInit) {
911       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
912                                        SrcElementCurrent, ElementTy);
913     } else
914       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
915                            /*IsInitializer=*/false);
916   }
917 
918   if (DRD) {
919     // Shift the address forward by one element.
920     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
921         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
922     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
923   }
924 
925   // Shift the address forward by one element.
926   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
927       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
928   // Check whether we've reached the end.
929   llvm::Value *Done =
930       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
931   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
932   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
933 
934   // Done.
935   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
936 }
937 
938 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
939   return CGF.EmitOMPSharedLValue(E);
940 }
941 
942 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
943                                             const Expr *E) {
944   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
945     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
946   return LValue();
947 }
948 
949 void ReductionCodeGen::emitAggregateInitialization(
950     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
951     const OMPDeclareReductionDecl *DRD) {
952   // Emit VarDecl with copy init for arrays.
953   // Get the address of the original variable captured in current
954   // captured region.
955   const auto *PrivateVD =
956       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
957   bool EmitDeclareReductionInit =
958       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
959   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
960                        EmitDeclareReductionInit,
961                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
962                                                 : PrivateVD->getInit(),
963                        DRD, SharedLVal.getAddress());
964 }
965 
966 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
967                                    ArrayRef<const Expr *> Privates,
968                                    ArrayRef<const Expr *> ReductionOps) {
969   ClausesData.reserve(Shareds.size());
970   SharedAddresses.reserve(Shareds.size());
971   Sizes.reserve(Shareds.size());
972   BaseDecls.reserve(Shareds.size());
973   auto IPriv = Privates.begin();
974   auto IRed = ReductionOps.begin();
975   for (const Expr *Ref : Shareds) {
976     ClausesData.emplace_back(Ref, *IPriv, *IRed);
977     std::advance(IPriv, 1);
978     std::advance(IRed, 1);
979   }
980 }
981 
982 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
983   assert(SharedAddresses.size() == N &&
984          "Number of generated lvalues must be exactly N.");
985   LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
986   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
987   SharedAddresses.emplace_back(First, Second);
988 }
989 
990 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
991   const auto *PrivateVD =
992       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
993   QualType PrivateType = PrivateVD->getType();
994   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
995   if (!PrivateType->isVariablyModifiedType()) {
996     Sizes.emplace_back(
997         CGF.getTypeSize(
998             SharedAddresses[N].first.getType().getNonReferenceType()),
999         nullptr);
1000     return;
1001   }
1002   llvm::Value *Size;
1003   llvm::Value *SizeInChars;
1004   auto *ElemType =
1005       cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType())
1006           ->getElementType();
1007   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
1008   if (AsArraySection) {
1009     Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
1010                                      SharedAddresses[N].first.getPointer());
1011     Size = CGF.Builder.CreateNUWAdd(
1012         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
1013     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
1014   } else {
1015     SizeInChars = CGF.getTypeSize(
1016         SharedAddresses[N].first.getType().getNonReferenceType());
1017     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
1018   }
1019   Sizes.emplace_back(SizeInChars, Size);
1020   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1021       CGF,
1022       cast<OpaqueValueExpr>(
1023           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1024       RValue::get(Size));
1025   CGF.EmitVariablyModifiedType(PrivateType);
1026 }
1027 
1028 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
1029                                          llvm::Value *Size) {
1030   const auto *PrivateVD =
1031       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1032   QualType PrivateType = PrivateVD->getType();
1033   if (!PrivateType->isVariablyModifiedType()) {
1034     assert(!Size && !Sizes[N].second &&
1035            "Size should be nullptr for non-variably modified reduction "
1036            "items.");
1037     return;
1038   }
1039   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1040       CGF,
1041       cast<OpaqueValueExpr>(
1042           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1043       RValue::get(Size));
1044   CGF.EmitVariablyModifiedType(PrivateType);
1045 }
1046 
1047 void ReductionCodeGen::emitInitialization(
1048     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1049     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1050   assert(SharedAddresses.size() > N && "No variable was generated");
1051   const auto *PrivateVD =
1052       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1053   const OMPDeclareReductionDecl *DRD =
1054       getReductionInit(ClausesData[N].ReductionOp);
1055   QualType PrivateType = PrivateVD->getType();
1056   PrivateAddr = CGF.Builder.CreateElementBitCast(
1057       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1058   QualType SharedType = SharedAddresses[N].first.getType();
1059   SharedLVal = CGF.MakeAddrLValue(
1060       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
1061                                        CGF.ConvertTypeForMem(SharedType)),
1062       SharedType, SharedAddresses[N].first.getBaseInfo(),
1063       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1064   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1065     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1066   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1067     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1068                                      PrivateAddr, SharedLVal.getAddress(),
1069                                      SharedLVal.getType());
1070   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1071              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1072     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1073                          PrivateVD->getType().getQualifiers(),
1074                          /*IsInitializer=*/false);
1075   }
1076 }
1077 
1078 bool ReductionCodeGen::needCleanups(unsigned N) {
1079   const auto *PrivateVD =
1080       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1081   QualType PrivateType = PrivateVD->getType();
1082   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1083   return DTorKind != QualType::DK_none;
1084 }
1085 
1086 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
1087                                     Address PrivateAddr) {
1088   const auto *PrivateVD =
1089       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1090   QualType PrivateType = PrivateVD->getType();
1091   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1092   if (needCleanups(N)) {
1093     PrivateAddr = CGF.Builder.CreateElementBitCast(
1094         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1095     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1096   }
1097 }
1098 
1099 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1100                           LValue BaseLV) {
1101   BaseTy = BaseTy.getNonReferenceType();
1102   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1103          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1104     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
1105       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
1106     } else {
1107       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
1108       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1109     }
1110     BaseTy = BaseTy->getPointeeType();
1111   }
1112   return CGF.MakeAddrLValue(
1113       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(),
1114                                        CGF.ConvertTypeForMem(ElTy)),
1115       BaseLV.getType(), BaseLV.getBaseInfo(),
1116       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1117 }
1118 
1119 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1120                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1121                           llvm::Value *Addr) {
1122   Address Tmp = Address::invalid();
1123   Address TopTmp = Address::invalid();
1124   Address MostTopTmp = Address::invalid();
1125   BaseTy = BaseTy.getNonReferenceType();
1126   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1127          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1128     Tmp = CGF.CreateMemTemp(BaseTy);
1129     if (TopTmp.isValid())
1130       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1131     else
1132       MostTopTmp = Tmp;
1133     TopTmp = Tmp;
1134     BaseTy = BaseTy->getPointeeType();
1135   }
1136   llvm::Type *Ty = BaseLVType;
1137   if (Tmp.isValid())
1138     Ty = Tmp.getElementType();
1139   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1140   if (Tmp.isValid()) {
1141     CGF.Builder.CreateStore(Addr, Tmp);
1142     return MostTopTmp;
1143   }
1144   return Address(Addr, BaseLVAlignment);
1145 }
1146 
1147 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1148   const VarDecl *OrigVD = nullptr;
1149   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1150     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
1151     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1152       Base = TempOASE->getBase()->IgnoreParenImpCasts();
1153     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1154       Base = TempASE->getBase()->IgnoreParenImpCasts();
1155     DE = cast<DeclRefExpr>(Base);
1156     OrigVD = cast<VarDecl>(DE->getDecl());
1157   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1158     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1159     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1160       Base = TempASE->getBase()->IgnoreParenImpCasts();
1161     DE = cast<DeclRefExpr>(Base);
1162     OrigVD = cast<VarDecl>(DE->getDecl());
1163   }
1164   return OrigVD;
1165 }
1166 
1167 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1168                                                Address PrivateAddr) {
1169   const DeclRefExpr *DE;
1170   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1171     BaseDecls.emplace_back(OrigVD);
1172     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1173     LValue BaseLValue =
1174         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1175                     OriginalBaseLValue);
1176     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1177         BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
1178     llvm::Value *PrivatePointer =
1179         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1180             PrivateAddr.getPointer(),
1181             SharedAddresses[N].first.getAddress().getType());
1182     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1183     return castToBase(CGF, OrigVD->getType(),
1184                       SharedAddresses[N].first.getType(),
1185                       OriginalBaseLValue.getAddress().getType(),
1186                       OriginalBaseLValue.getAlignment(), Ptr);
1187   }
1188   BaseDecls.emplace_back(
1189       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1190   return PrivateAddr;
1191 }
1192 
1193 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1194   const OMPDeclareReductionDecl *DRD =
1195       getReductionInit(ClausesData[N].ReductionOp);
1196   return DRD && DRD->getInitializer();
1197 }
1198 
1199 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1200   return CGF.EmitLoadOfPointerLValue(
1201       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1202       getThreadIDVariable()->getType()->castAs<PointerType>());
1203 }
1204 
1205 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1206   if (!CGF.HaveInsertPoint())
1207     return;
1208   // 1.2.2 OpenMP Language Terminology
1209   // Structured block - An executable statement with a single entry at the
1210   // top and a single exit at the bottom.
1211   // The point of exit cannot be a branch out of the structured block.
1212   // longjmp() and throw() must not violate the entry/exit criteria.
1213   CGF.EHStack.pushTerminate();
1214   CodeGen(CGF);
1215   CGF.EHStack.popTerminate();
1216 }
1217 
1218 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1219     CodeGenFunction &CGF) {
1220   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1221                             getThreadIDVariable()->getType(),
1222                             AlignmentSource::Decl);
1223 }
1224 
1225 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1226                                        QualType FieldTy) {
1227   auto *Field = FieldDecl::Create(
1228       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1229       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1230       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1231   Field->setAccess(AS_public);
1232   DC->addDecl(Field);
1233   return Field;
1234 }
1235 
1236 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1237                                  StringRef Separator)
1238     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1239       OffloadEntriesInfoManager(CGM) {
1240   ASTContext &C = CGM.getContext();
1241   RecordDecl *RD = C.buildImplicitRecord("ident_t");
1242   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1243   RD->startDefinition();
1244   // reserved_1
1245   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1246   // flags
1247   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1248   // reserved_2
1249   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1250   // reserved_3
1251   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1252   // psource
1253   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1254   RD->completeDefinition();
1255   IdentQTy = C.getRecordType(RD);
1256   IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1257   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1258 
1259   loadOffloadInfoMetadata();
1260 }
1261 
1262 void CGOpenMPRuntime::clear() {
1263   InternalVars.clear();
1264   // Clean non-target variable declarations possibly used only in debug info.
1265   for (const auto &Data : EmittedNonTargetVariables) {
1266     if (!Data.getValue().pointsToAliveValue())
1267       continue;
1268     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1269     if (!GV)
1270       continue;
1271     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1272       continue;
1273     GV->eraseFromParent();
1274   }
1275 }
1276 
1277 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1278   SmallString<128> Buffer;
1279   llvm::raw_svector_ostream OS(Buffer);
1280   StringRef Sep = FirstSeparator;
1281   for (StringRef Part : Parts) {
1282     OS << Sep << Part;
1283     Sep = Separator;
1284   }
1285   return OS.str();
1286 }
1287 
1288 static llvm::Function *
1289 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1290                           const Expr *CombinerInitializer, const VarDecl *In,
1291                           const VarDecl *Out, bool IsCombiner) {
1292   // void .omp_combiner.(Ty *in, Ty *out);
1293   ASTContext &C = CGM.getContext();
1294   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1295   FunctionArgList Args;
1296   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1297                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1298   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1299                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1300   Args.push_back(&OmpOutParm);
1301   Args.push_back(&OmpInParm);
1302   const CGFunctionInfo &FnInfo =
1303       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1304   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1305   std::string Name = CGM.getOpenMPRuntime().getName(
1306       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1307   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1308                                     Name, &CGM.getModule());
1309   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1310   if (CGM.getLangOpts().Optimize) {
1311     Fn->removeFnAttr(llvm::Attribute::NoInline);
1312     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1313     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1314   }
1315   CodeGenFunction CGF(CGM);
1316   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1317   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1318   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1319                     Out->getLocation());
1320   CodeGenFunction::OMPPrivateScope Scope(CGF);
1321   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1322   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1323     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1324         .getAddress();
1325   });
1326   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1327   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1328     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1329         .getAddress();
1330   });
1331   (void)Scope.Privatize();
1332   if (!IsCombiner && Out->hasInit() &&
1333       !CGF.isTrivialInitializer(Out->getInit())) {
1334     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1335                          Out->getType().getQualifiers(),
1336                          /*IsInitializer=*/true);
1337   }
1338   if (CombinerInitializer)
1339     CGF.EmitIgnoredExpr(CombinerInitializer);
1340   Scope.ForceCleanup();
1341   CGF.FinishFunction();
1342   return Fn;
1343 }
1344 
1345 void CGOpenMPRuntime::emitUserDefinedReduction(
1346     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1347   if (UDRMap.count(D) > 0)
1348     return;
1349   llvm::Function *Combiner = emitCombinerOrInitializer(
1350       CGM, D->getType(), D->getCombiner(),
1351       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1352       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1353       /*IsCombiner=*/true);
1354   llvm::Function *Initializer = nullptr;
1355   if (const Expr *Init = D->getInitializer()) {
1356     Initializer = emitCombinerOrInitializer(
1357         CGM, D->getType(),
1358         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1359                                                                      : nullptr,
1360         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1361         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1362         /*IsCombiner=*/false);
1363   }
1364   UDRMap.try_emplace(D, Combiner, Initializer);
1365   if (CGF) {
1366     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1367     Decls.second.push_back(D);
1368   }
1369 }
1370 
1371 std::pair<llvm::Function *, llvm::Function *>
1372 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1373   auto I = UDRMap.find(D);
1374   if (I != UDRMap.end())
1375     return I->second;
1376   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1377   return UDRMap.lookup(D);
1378 }
1379 
1380 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1381     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1382     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1383     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1384   assert(ThreadIDVar->getType()->isPointerType() &&
1385          "thread id variable must be of type kmp_int32 *");
1386   CodeGenFunction CGF(CGM, true);
1387   bool HasCancel = false;
1388   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1389     HasCancel = OPD->hasCancel();
1390   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1391     HasCancel = OPSD->hasCancel();
1392   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1393     HasCancel = OPFD->hasCancel();
1394   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1395     HasCancel = OPFD->hasCancel();
1396   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1397     HasCancel = OPFD->hasCancel();
1398   else if (const auto *OPFD =
1399                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1400     HasCancel = OPFD->hasCancel();
1401   else if (const auto *OPFD =
1402                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1403     HasCancel = OPFD->hasCancel();
1404   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1405                                     HasCancel, OutlinedHelperName);
1406   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1407   return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
1408 }
1409 
1410 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1411     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1412     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1413   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1414   return emitParallelOrTeamsOutlinedFunction(
1415       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1416 }
1417 
1418 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1419     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1420     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1421   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1422   return emitParallelOrTeamsOutlinedFunction(
1423       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1424 }
1425 
1426 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1427     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1428     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1429     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1430     bool Tied, unsigned &NumberOfParts) {
1431   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1432                                               PrePostActionTy &) {
1433     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1434     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1435     llvm::Value *TaskArgs[] = {
1436         UpLoc, ThreadID,
1437         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1438                                     TaskTVar->getType()->castAs<PointerType>())
1439             .getPointer()};
1440     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1441   };
1442   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1443                                                             UntiedCodeGen);
1444   CodeGen.setAction(Action);
1445   assert(!ThreadIDVar->getType()->isPointerType() &&
1446          "thread id variable must be of type kmp_int32 for tasks");
1447   const OpenMPDirectiveKind Region =
1448       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1449                                                       : OMPD_task;
1450   const CapturedStmt *CS = D.getCapturedStmt(Region);
1451   const auto *TD = dyn_cast<OMPTaskDirective>(&D);
1452   CodeGenFunction CGF(CGM, true);
1453   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1454                                         InnermostKind,
1455                                         TD ? TD->hasCancel() : false, Action);
1456   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1457   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1458   if (!Tied)
1459     NumberOfParts = Action.getNumberOfParts();
1460   return Res;
1461 }
1462 
1463 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1464                              const RecordDecl *RD, const CGRecordLayout &RL,
1465                              ArrayRef<llvm::Constant *> Data) {
1466   llvm::StructType *StructTy = RL.getLLVMType();
1467   unsigned PrevIdx = 0;
1468   ConstantInitBuilder CIBuilder(CGM);
1469   auto DI = Data.begin();
1470   for (const FieldDecl *FD : RD->fields()) {
1471     unsigned Idx = RL.getLLVMFieldNo(FD);
1472     // Fill the alignment.
1473     for (unsigned I = PrevIdx; I < Idx; ++I)
1474       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1475     PrevIdx = Idx + 1;
1476     Fields.add(*DI);
1477     ++DI;
1478   }
1479 }
1480 
1481 template <class... As>
1482 static llvm::GlobalVariable *
1483 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1484                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1485                    As &&... Args) {
1486   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1487   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1488   ConstantInitBuilder CIBuilder(CGM);
1489   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1490   buildStructValue(Fields, CGM, RD, RL, Data);
1491   return Fields.finishAndCreateGlobal(
1492       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1493       std::forward<As>(Args)...);
1494 }
1495 
1496 template <typename T>
1497 static void
1498 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1499                                          ArrayRef<llvm::Constant *> Data,
1500                                          T &Parent) {
1501   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1502   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1503   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1504   buildStructValue(Fields, CGM, RD, RL, Data);
1505   Fields.finishAndAddTo(Parent);
1506 }
1507 
1508 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1509   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1510   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1511   FlagsTy FlagsKey(Flags, Reserved2Flags);
1512   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1513   if (!Entry) {
1514     if (!DefaultOpenMPPSource) {
1515       // Initialize default location for psource field of ident_t structure of
1516       // all ident_t objects. Format is ";file;function;line;column;;".
1517       // Taken from
1518       // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1519       DefaultOpenMPPSource =
1520           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1521       DefaultOpenMPPSource =
1522           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1523     }
1524 
1525     llvm::Constant *Data[] = {
1526         llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1527         llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1528         llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1529         llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1530     llvm::GlobalValue *DefaultOpenMPLocation =
1531         createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1532                            llvm::GlobalValue::PrivateLinkage);
1533     DefaultOpenMPLocation->setUnnamedAddr(
1534         llvm::GlobalValue::UnnamedAddr::Global);
1535 
1536     OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1537   }
1538   return Address(Entry, Align);
1539 }
1540 
1541 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1542                                              bool AtCurrentPoint) {
1543   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1544   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1545 
1546   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1547   if (AtCurrentPoint) {
1548     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1549         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1550   } else {
1551     Elem.second.ServiceInsertPt =
1552         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1553     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1554   }
1555 }
1556 
1557 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1558   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1559   if (Elem.second.ServiceInsertPt) {
1560     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1561     Elem.second.ServiceInsertPt = nullptr;
1562     Ptr->eraseFromParent();
1563   }
1564 }
1565 
1566 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1567                                                  SourceLocation Loc,
1568                                                  unsigned Flags) {
1569   Flags |= OMP_IDENT_KMPC;
1570   // If no debug info is generated - return global default location.
1571   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1572       Loc.isInvalid())
1573     return getOrCreateDefaultLocation(Flags).getPointer();
1574 
1575   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1576 
1577   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1578   Address LocValue = Address::invalid();
1579   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1580   if (I != OpenMPLocThreadIDMap.end())
1581     LocValue = Address(I->second.DebugLoc, Align);
1582 
1583   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1584   // GetOpenMPThreadID was called before this routine.
1585   if (!LocValue.isValid()) {
1586     // Generate "ident_t .kmpc_loc.addr;"
1587     Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1588     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1589     Elem.second.DebugLoc = AI.getPointer();
1590     LocValue = AI;
1591 
1592     if (!Elem.second.ServiceInsertPt)
1593       setLocThreadIdInsertPt(CGF);
1594     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1595     CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1596     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1597                              CGF.getTypeSize(IdentQTy));
1598   }
1599 
1600   // char **psource = &.kmpc_loc_<flags>.addr.psource;
1601   LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1602   auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1603   LValue PSource =
1604       CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1605 
1606   llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1607   if (OMPDebugLoc == nullptr) {
1608     SmallString<128> Buffer2;
1609     llvm::raw_svector_ostream OS2(Buffer2);
1610     // Build debug location
1611     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1612     OS2 << ";" << PLoc.getFilename() << ";";
1613     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1614       OS2 << FD->getQualifiedNameAsString();
1615     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1616     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1617     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1618   }
1619   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1620   CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1621 
1622   // Our callers always pass this to a runtime function, so for
1623   // convenience, go ahead and return a naked pointer.
1624   return LocValue.getPointer();
1625 }
1626 
1627 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1628                                           SourceLocation Loc) {
1629   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1630 
1631   llvm::Value *ThreadID = nullptr;
1632   // Check whether we've already cached a load of the thread id in this
1633   // function.
1634   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1635   if (I != OpenMPLocThreadIDMap.end()) {
1636     ThreadID = I->second.ThreadID;
1637     if (ThreadID != nullptr)
1638       return ThreadID;
1639   }
1640   // If exceptions are enabled, do not use parameter to avoid possible crash.
1641   if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1642       !CGF.getLangOpts().CXXExceptions ||
1643       CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1644     if (auto *OMPRegionInfo =
1645             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1646       if (OMPRegionInfo->getThreadIDVariable()) {
1647         // Check if this an outlined function with thread id passed as argument.
1648         LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1649         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1650         // If value loaded in entry block, cache it and use it everywhere in
1651         // function.
1652         if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1653           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1654           Elem.second.ThreadID = ThreadID;
1655         }
1656         return ThreadID;
1657       }
1658     }
1659   }
1660 
1661   // This is not an outlined function region - need to call __kmpc_int32
1662   // kmpc_global_thread_num(ident_t *loc).
1663   // Generate thread id value and cache this value for use across the
1664   // function.
1665   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1666   if (!Elem.second.ServiceInsertPt)
1667     setLocThreadIdInsertPt(CGF);
1668   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1669   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1670   llvm::CallInst *Call = CGF.Builder.CreateCall(
1671       createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1672       emitUpdateLocation(CGF, Loc));
1673   Call->setCallingConv(CGF.getRuntimeCC());
1674   Elem.second.ThreadID = Call;
1675   return Call;
1676 }
1677 
1678 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1679   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1680   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1681     clearLocThreadIdInsertPt(CGF);
1682     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1683   }
1684   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1685     for(auto *D : FunctionUDRMap[CGF.CurFn])
1686       UDRMap.erase(D);
1687     FunctionUDRMap.erase(CGF.CurFn);
1688   }
1689 }
1690 
1691 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1692   return IdentTy->getPointerTo();
1693 }
1694 
1695 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1696   if (!Kmpc_MicroTy) {
1697     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1698     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1699                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1700     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1701   }
1702   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1703 }
1704 
1705 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1706   llvm::FunctionCallee RTLFn = nullptr;
1707   switch (static_cast<OpenMPRTLFunction>(Function)) {
1708   case OMPRTL__kmpc_fork_call: {
1709     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1710     // microtask, ...);
1711     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1712                                 getKmpc_MicroPointerTy()};
1713     auto *FnTy =
1714         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1715     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1716     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
1717       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
1718         llvm::LLVMContext &Ctx = F->getContext();
1719         llvm::MDBuilder MDB(Ctx);
1720         // Annotate the callback behavior of the __kmpc_fork_call:
1721         //  - The callback callee is argument number 2 (microtask).
1722         //  - The first two arguments of the callback callee are unknown (-1).
1723         //  - All variadic arguments to the __kmpc_fork_call are passed to the
1724         //    callback callee.
1725         F->addMetadata(
1726             llvm::LLVMContext::MD_callback,
1727             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
1728                                         2, {-1, -1},
1729                                         /* VarArgsArePassed */ true)}));
1730       }
1731     }
1732     break;
1733   }
1734   case OMPRTL__kmpc_global_thread_num: {
1735     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1736     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1737     auto *FnTy =
1738         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1739     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1740     break;
1741   }
1742   case OMPRTL__kmpc_threadprivate_cached: {
1743     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1744     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1745     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1746                                 CGM.VoidPtrTy, CGM.SizeTy,
1747                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1748     auto *FnTy =
1749         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1750     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1751     break;
1752   }
1753   case OMPRTL__kmpc_critical: {
1754     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1755     // kmp_critical_name *crit);
1756     llvm::Type *TypeParams[] = {
1757         getIdentTyPointerTy(), CGM.Int32Ty,
1758         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1759     auto *FnTy =
1760         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1761     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1762     break;
1763   }
1764   case OMPRTL__kmpc_critical_with_hint: {
1765     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1766     // kmp_critical_name *crit, uintptr_t hint);
1767     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1768                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
1769                                 CGM.IntPtrTy};
1770     auto *FnTy =
1771         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1772     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1773     break;
1774   }
1775   case OMPRTL__kmpc_threadprivate_register: {
1776     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1777     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1778     // typedef void *(*kmpc_ctor)(void *);
1779     auto *KmpcCtorTy =
1780         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1781                                 /*isVarArg*/ false)->getPointerTo();
1782     // typedef void *(*kmpc_cctor)(void *, void *);
1783     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1784     auto *KmpcCopyCtorTy =
1785         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1786                                 /*isVarArg*/ false)
1787             ->getPointerTo();
1788     // typedef void (*kmpc_dtor)(void *);
1789     auto *KmpcDtorTy =
1790         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1791             ->getPointerTo();
1792     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1793                               KmpcCopyCtorTy, KmpcDtorTy};
1794     auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1795                                         /*isVarArg*/ false);
1796     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1797     break;
1798   }
1799   case OMPRTL__kmpc_end_critical: {
1800     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1801     // kmp_critical_name *crit);
1802     llvm::Type *TypeParams[] = {
1803         getIdentTyPointerTy(), CGM.Int32Ty,
1804         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1805     auto *FnTy =
1806         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1807     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1808     break;
1809   }
1810   case OMPRTL__kmpc_cancel_barrier: {
1811     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1812     // global_tid);
1813     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1814     auto *FnTy =
1815         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1816     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1817     break;
1818   }
1819   case OMPRTL__kmpc_barrier: {
1820     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1821     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1822     auto *FnTy =
1823         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1824     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1825     break;
1826   }
1827   case OMPRTL__kmpc_for_static_fini: {
1828     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1829     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1830     auto *FnTy =
1831         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1832     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1833     break;
1834   }
1835   case OMPRTL__kmpc_push_num_threads: {
1836     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1837     // kmp_int32 num_threads)
1838     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1839                                 CGM.Int32Ty};
1840     auto *FnTy =
1841         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1842     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1843     break;
1844   }
1845   case OMPRTL__kmpc_serialized_parallel: {
1846     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1847     // global_tid);
1848     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1849     auto *FnTy =
1850         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1851     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1852     break;
1853   }
1854   case OMPRTL__kmpc_end_serialized_parallel: {
1855     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1856     // global_tid);
1857     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1858     auto *FnTy =
1859         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1860     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1861     break;
1862   }
1863   case OMPRTL__kmpc_flush: {
1864     // Build void __kmpc_flush(ident_t *loc);
1865     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1866     auto *FnTy =
1867         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1868     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1869     break;
1870   }
1871   case OMPRTL__kmpc_master: {
1872     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1873     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1874     auto *FnTy =
1875         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1876     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1877     break;
1878   }
1879   case OMPRTL__kmpc_end_master: {
1880     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1881     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1882     auto *FnTy =
1883         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1884     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1885     break;
1886   }
1887   case OMPRTL__kmpc_omp_taskyield: {
1888     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1889     // int end_part);
1890     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1891     auto *FnTy =
1892         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1893     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1894     break;
1895   }
1896   case OMPRTL__kmpc_single: {
1897     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1898     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1899     auto *FnTy =
1900         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1901     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1902     break;
1903   }
1904   case OMPRTL__kmpc_end_single: {
1905     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1906     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1907     auto *FnTy =
1908         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1909     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1910     break;
1911   }
1912   case OMPRTL__kmpc_omp_task_alloc: {
1913     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1914     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1915     // kmp_routine_entry_t *task_entry);
1916     assert(KmpRoutineEntryPtrTy != nullptr &&
1917            "Type kmp_routine_entry_t must be created.");
1918     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1919                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
1920     // Return void * and then cast to particular kmp_task_t type.
1921     auto *FnTy =
1922         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1923     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
1924     break;
1925   }
1926   case OMPRTL__kmpc_omp_target_task_alloc: {
1927     // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid,
1928     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1929     // kmp_routine_entry_t *task_entry, kmp_int64 device_id);
1930     assert(KmpRoutineEntryPtrTy != nullptr &&
1931            "Type kmp_routine_entry_t must be created.");
1932     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1933                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy,
1934                                 CGM.Int64Ty};
1935     // Return void * and then cast to particular kmp_task_t type.
1936     auto *FnTy =
1937         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1938     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc");
1939     break;
1940   }
1941   case OMPRTL__kmpc_omp_task: {
1942     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1943     // *new_task);
1944     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1945                                 CGM.VoidPtrTy};
1946     auto *FnTy =
1947         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1948     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
1949     break;
1950   }
1951   case OMPRTL__kmpc_copyprivate: {
1952     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
1953     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
1954     // kmp_int32 didit);
1955     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1956     auto *CpyFnTy =
1957         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
1958     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
1959                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
1960                                 CGM.Int32Ty};
1961     auto *FnTy =
1962         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1963     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
1964     break;
1965   }
1966   case OMPRTL__kmpc_reduce: {
1967     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
1968     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
1969     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
1970     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1971     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1972                                                /*isVarArg=*/false);
1973     llvm::Type *TypeParams[] = {
1974         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1975         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1976         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1977     auto *FnTy =
1978         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1979     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
1980     break;
1981   }
1982   case OMPRTL__kmpc_reduce_nowait: {
1983     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
1984     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
1985     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
1986     // *lck);
1987     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1988     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1989                                                /*isVarArg=*/false);
1990     llvm::Type *TypeParams[] = {
1991         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1992         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1993         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1994     auto *FnTy =
1995         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1996     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
1997     break;
1998   }
1999   case OMPRTL__kmpc_end_reduce: {
2000     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
2001     // kmp_critical_name *lck);
2002     llvm::Type *TypeParams[] = {
2003         getIdentTyPointerTy(), CGM.Int32Ty,
2004         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2005     auto *FnTy =
2006         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2007     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
2008     break;
2009   }
2010   case OMPRTL__kmpc_end_reduce_nowait: {
2011     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
2012     // kmp_critical_name *lck);
2013     llvm::Type *TypeParams[] = {
2014         getIdentTyPointerTy(), CGM.Int32Ty,
2015         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2016     auto *FnTy =
2017         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2018     RTLFn =
2019         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
2020     break;
2021   }
2022   case OMPRTL__kmpc_omp_task_begin_if0: {
2023     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2024     // *new_task);
2025     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2026                                 CGM.VoidPtrTy};
2027     auto *FnTy =
2028         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2029     RTLFn =
2030         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
2031     break;
2032   }
2033   case OMPRTL__kmpc_omp_task_complete_if0: {
2034     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2035     // *new_task);
2036     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2037                                 CGM.VoidPtrTy};
2038     auto *FnTy =
2039         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2040     RTLFn = CGM.CreateRuntimeFunction(FnTy,
2041                                       /*Name=*/"__kmpc_omp_task_complete_if0");
2042     break;
2043   }
2044   case OMPRTL__kmpc_ordered: {
2045     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
2046     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2047     auto *FnTy =
2048         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2049     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
2050     break;
2051   }
2052   case OMPRTL__kmpc_end_ordered: {
2053     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
2054     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2055     auto *FnTy =
2056         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2057     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
2058     break;
2059   }
2060   case OMPRTL__kmpc_omp_taskwait: {
2061     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
2062     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2063     auto *FnTy =
2064         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2065     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
2066     break;
2067   }
2068   case OMPRTL__kmpc_taskgroup: {
2069     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
2070     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2071     auto *FnTy =
2072         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2073     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
2074     break;
2075   }
2076   case OMPRTL__kmpc_end_taskgroup: {
2077     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
2078     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2079     auto *FnTy =
2080         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2081     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
2082     break;
2083   }
2084   case OMPRTL__kmpc_push_proc_bind: {
2085     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
2086     // int proc_bind)
2087     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2088     auto *FnTy =
2089         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2090     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
2091     break;
2092   }
2093   case OMPRTL__kmpc_omp_task_with_deps: {
2094     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2095     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2096     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
2097     llvm::Type *TypeParams[] = {
2098         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
2099         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
2100     auto *FnTy =
2101         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2102     RTLFn =
2103         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
2104     break;
2105   }
2106   case OMPRTL__kmpc_omp_wait_deps: {
2107     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2108     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
2109     // kmp_depend_info_t *noalias_dep_list);
2110     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2111                                 CGM.Int32Ty,           CGM.VoidPtrTy,
2112                                 CGM.Int32Ty,           CGM.VoidPtrTy};
2113     auto *FnTy =
2114         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2115     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
2116     break;
2117   }
2118   case OMPRTL__kmpc_cancellationpoint: {
2119     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2120     // global_tid, kmp_int32 cncl_kind)
2121     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2122     auto *FnTy =
2123         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2124     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
2125     break;
2126   }
2127   case OMPRTL__kmpc_cancel: {
2128     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2129     // kmp_int32 cncl_kind)
2130     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2131     auto *FnTy =
2132         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2133     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
2134     break;
2135   }
2136   case OMPRTL__kmpc_push_num_teams: {
2137     // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
2138     // kmp_int32 num_teams, kmp_int32 num_threads)
2139     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2140         CGM.Int32Ty};
2141     auto *FnTy =
2142         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2143     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
2144     break;
2145   }
2146   case OMPRTL__kmpc_fork_teams: {
2147     // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
2148     // microtask, ...);
2149     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2150                                 getKmpc_MicroPointerTy()};
2151     auto *FnTy =
2152         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
2153     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
2154     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
2155       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
2156         llvm::LLVMContext &Ctx = F->getContext();
2157         llvm::MDBuilder MDB(Ctx);
2158         // Annotate the callback behavior of the __kmpc_fork_teams:
2159         //  - The callback callee is argument number 2 (microtask).
2160         //  - The first two arguments of the callback callee are unknown (-1).
2161         //  - All variadic arguments to the __kmpc_fork_teams are passed to the
2162         //    callback callee.
2163         F->addMetadata(
2164             llvm::LLVMContext::MD_callback,
2165             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
2166                                         2, {-1, -1},
2167                                         /* VarArgsArePassed */ true)}));
2168       }
2169     }
2170     break;
2171   }
2172   case OMPRTL__kmpc_taskloop: {
2173     // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
2174     // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
2175     // sched, kmp_uint64 grainsize, void *task_dup);
2176     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2177                                 CGM.IntTy,
2178                                 CGM.VoidPtrTy,
2179                                 CGM.IntTy,
2180                                 CGM.Int64Ty->getPointerTo(),
2181                                 CGM.Int64Ty->getPointerTo(),
2182                                 CGM.Int64Ty,
2183                                 CGM.IntTy,
2184                                 CGM.IntTy,
2185                                 CGM.Int64Ty,
2186                                 CGM.VoidPtrTy};
2187     auto *FnTy =
2188         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2189     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2190     break;
2191   }
2192   case OMPRTL__kmpc_doacross_init: {
2193     // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2194     // num_dims, struct kmp_dim *dims);
2195     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2196                                 CGM.Int32Ty,
2197                                 CGM.Int32Ty,
2198                                 CGM.VoidPtrTy};
2199     auto *FnTy =
2200         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2201     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2202     break;
2203   }
2204   case OMPRTL__kmpc_doacross_fini: {
2205     // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2206     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2207     auto *FnTy =
2208         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2209     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2210     break;
2211   }
2212   case OMPRTL__kmpc_doacross_post: {
2213     // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2214     // *vec);
2215     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2216                                 CGM.Int64Ty->getPointerTo()};
2217     auto *FnTy =
2218         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2219     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2220     break;
2221   }
2222   case OMPRTL__kmpc_doacross_wait: {
2223     // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2224     // *vec);
2225     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2226                                 CGM.Int64Ty->getPointerTo()};
2227     auto *FnTy =
2228         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2229     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2230     break;
2231   }
2232   case OMPRTL__kmpc_task_reduction_init: {
2233     // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2234     // *data);
2235     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2236     auto *FnTy =
2237         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2238     RTLFn =
2239         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2240     break;
2241   }
2242   case OMPRTL__kmpc_task_reduction_get_th_data: {
2243     // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2244     // *d);
2245     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2246     auto *FnTy =
2247         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2248     RTLFn = CGM.CreateRuntimeFunction(
2249         FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2250     break;
2251   }
2252   case OMPRTL__kmpc_alloc: {
2253     // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t
2254     // al); omp_allocator_handle_t type is void *.
2255     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy};
2256     auto *FnTy =
2257         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2258     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc");
2259     break;
2260   }
2261   case OMPRTL__kmpc_free: {
2262     // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t
2263     // al); omp_allocator_handle_t type is void *.
2264     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2265     auto *FnTy =
2266         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2267     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free");
2268     break;
2269   }
2270   case OMPRTL__kmpc_push_target_tripcount: {
2271     // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
2272     // size);
2273     llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
2274     llvm::FunctionType *FnTy =
2275         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2276     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
2277     break;
2278   }
2279   case OMPRTL__tgt_target: {
2280     // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2281     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2282     // *arg_types);
2283     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2284                                 CGM.VoidPtrTy,
2285                                 CGM.Int32Ty,
2286                                 CGM.VoidPtrPtrTy,
2287                                 CGM.VoidPtrPtrTy,
2288                                 CGM.Int64Ty->getPointerTo(),
2289                                 CGM.Int64Ty->getPointerTo()};
2290     auto *FnTy =
2291         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2292     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2293     break;
2294   }
2295   case OMPRTL__tgt_target_nowait: {
2296     // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2297     // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2298     // int64_t *arg_types);
2299     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2300                                 CGM.VoidPtrTy,
2301                                 CGM.Int32Ty,
2302                                 CGM.VoidPtrPtrTy,
2303                                 CGM.VoidPtrPtrTy,
2304                                 CGM.Int64Ty->getPointerTo(),
2305                                 CGM.Int64Ty->getPointerTo()};
2306     auto *FnTy =
2307         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2308     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2309     break;
2310   }
2311   case OMPRTL__tgt_target_teams: {
2312     // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2313     // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2314     // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2315     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2316                                 CGM.VoidPtrTy,
2317                                 CGM.Int32Ty,
2318                                 CGM.VoidPtrPtrTy,
2319                                 CGM.VoidPtrPtrTy,
2320                                 CGM.Int64Ty->getPointerTo(),
2321                                 CGM.Int64Ty->getPointerTo(),
2322                                 CGM.Int32Ty,
2323                                 CGM.Int32Ty};
2324     auto *FnTy =
2325         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2326     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2327     break;
2328   }
2329   case OMPRTL__tgt_target_teams_nowait: {
2330     // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2331     // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
2332     // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2333     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2334                                 CGM.VoidPtrTy,
2335                                 CGM.Int32Ty,
2336                                 CGM.VoidPtrPtrTy,
2337                                 CGM.VoidPtrPtrTy,
2338                                 CGM.Int64Ty->getPointerTo(),
2339                                 CGM.Int64Ty->getPointerTo(),
2340                                 CGM.Int32Ty,
2341                                 CGM.Int32Ty};
2342     auto *FnTy =
2343         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2344     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2345     break;
2346   }
2347   case OMPRTL__tgt_register_requires: {
2348     // Build void __tgt_register_requires(int64_t flags);
2349     llvm::Type *TypeParams[] = {CGM.Int64Ty};
2350     auto *FnTy =
2351         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2352     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires");
2353     break;
2354   }
2355   case OMPRTL__tgt_register_lib: {
2356     // Build void __tgt_register_lib(__tgt_bin_desc *desc);
2357     QualType ParamTy =
2358         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2359     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2360     auto *FnTy =
2361         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2362     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
2363     break;
2364   }
2365   case OMPRTL__tgt_unregister_lib: {
2366     // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
2367     QualType ParamTy =
2368         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2369     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2370     auto *FnTy =
2371         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2372     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
2373     break;
2374   }
2375   case OMPRTL__tgt_target_data_begin: {
2376     // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2377     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2378     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2379                                 CGM.Int32Ty,
2380                                 CGM.VoidPtrPtrTy,
2381                                 CGM.VoidPtrPtrTy,
2382                                 CGM.Int64Ty->getPointerTo(),
2383                                 CGM.Int64Ty->getPointerTo()};
2384     auto *FnTy =
2385         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2386     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2387     break;
2388   }
2389   case OMPRTL__tgt_target_data_begin_nowait: {
2390     // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2391     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2392     // *arg_types);
2393     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2394                                 CGM.Int32Ty,
2395                                 CGM.VoidPtrPtrTy,
2396                                 CGM.VoidPtrPtrTy,
2397                                 CGM.Int64Ty->getPointerTo(),
2398                                 CGM.Int64Ty->getPointerTo()};
2399     auto *FnTy =
2400         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2401     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2402     break;
2403   }
2404   case OMPRTL__tgt_target_data_end: {
2405     // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2406     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2407     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2408                                 CGM.Int32Ty,
2409                                 CGM.VoidPtrPtrTy,
2410                                 CGM.VoidPtrPtrTy,
2411                                 CGM.Int64Ty->getPointerTo(),
2412                                 CGM.Int64Ty->getPointerTo()};
2413     auto *FnTy =
2414         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2415     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2416     break;
2417   }
2418   case OMPRTL__tgt_target_data_end_nowait: {
2419     // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2420     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2421     // *arg_types);
2422     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2423                                 CGM.Int32Ty,
2424                                 CGM.VoidPtrPtrTy,
2425                                 CGM.VoidPtrPtrTy,
2426                                 CGM.Int64Ty->getPointerTo(),
2427                                 CGM.Int64Ty->getPointerTo()};
2428     auto *FnTy =
2429         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2430     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2431     break;
2432   }
2433   case OMPRTL__tgt_target_data_update: {
2434     // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2435     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2436     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2437                                 CGM.Int32Ty,
2438                                 CGM.VoidPtrPtrTy,
2439                                 CGM.VoidPtrPtrTy,
2440                                 CGM.Int64Ty->getPointerTo(),
2441                                 CGM.Int64Ty->getPointerTo()};
2442     auto *FnTy =
2443         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2444     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2445     break;
2446   }
2447   case OMPRTL__tgt_target_data_update_nowait: {
2448     // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2449     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2450     // *arg_types);
2451     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2452                                 CGM.Int32Ty,
2453                                 CGM.VoidPtrPtrTy,
2454                                 CGM.VoidPtrPtrTy,
2455                                 CGM.Int64Ty->getPointerTo(),
2456                                 CGM.Int64Ty->getPointerTo()};
2457     auto *FnTy =
2458         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2459     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2460     break;
2461   }
2462   }
2463   assert(RTLFn && "Unable to find OpenMP runtime function");
2464   return RTLFn;
2465 }
2466 
2467 llvm::FunctionCallee
2468 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
2469   assert((IVSize == 32 || IVSize == 64) &&
2470          "IV size is not compatible with the omp runtime");
2471   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2472                                             : "__kmpc_for_static_init_4u")
2473                                 : (IVSigned ? "__kmpc_for_static_init_8"
2474                                             : "__kmpc_for_static_init_8u");
2475   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2476   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2477   llvm::Type *TypeParams[] = {
2478     getIdentTyPointerTy(),                     // loc
2479     CGM.Int32Ty,                               // tid
2480     CGM.Int32Ty,                               // schedtype
2481     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2482     PtrTy,                                     // p_lower
2483     PtrTy,                                     // p_upper
2484     PtrTy,                                     // p_stride
2485     ITy,                                       // incr
2486     ITy                                        // chunk
2487   };
2488   auto *FnTy =
2489       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2490   return CGM.CreateRuntimeFunction(FnTy, Name);
2491 }
2492 
2493 llvm::FunctionCallee
2494 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
2495   assert((IVSize == 32 || IVSize == 64) &&
2496          "IV size is not compatible with the omp runtime");
2497   StringRef Name =
2498       IVSize == 32
2499           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2500           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2501   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2502   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2503                                CGM.Int32Ty,           // tid
2504                                CGM.Int32Ty,           // schedtype
2505                                ITy,                   // lower
2506                                ITy,                   // upper
2507                                ITy,                   // stride
2508                                ITy                    // chunk
2509   };
2510   auto *FnTy =
2511       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2512   return CGM.CreateRuntimeFunction(FnTy, Name);
2513 }
2514 
2515 llvm::FunctionCallee
2516 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
2517   assert((IVSize == 32 || IVSize == 64) &&
2518          "IV size is not compatible with the omp runtime");
2519   StringRef Name =
2520       IVSize == 32
2521           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2522           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2523   llvm::Type *TypeParams[] = {
2524       getIdentTyPointerTy(), // loc
2525       CGM.Int32Ty,           // tid
2526   };
2527   auto *FnTy =
2528       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2529   return CGM.CreateRuntimeFunction(FnTy, Name);
2530 }
2531 
2532 llvm::FunctionCallee
2533 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
2534   assert((IVSize == 32 || IVSize == 64) &&
2535          "IV size is not compatible with the omp runtime");
2536   StringRef Name =
2537       IVSize == 32
2538           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2539           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2540   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2541   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2542   llvm::Type *TypeParams[] = {
2543     getIdentTyPointerTy(),                     // loc
2544     CGM.Int32Ty,                               // tid
2545     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2546     PtrTy,                                     // p_lower
2547     PtrTy,                                     // p_upper
2548     PtrTy                                      // p_stride
2549   };
2550   auto *FnTy =
2551       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2552   return CGM.CreateRuntimeFunction(FnTy, Name);
2553 }
2554 
2555 /// Obtain information that uniquely identifies a target entry. This
2556 /// consists of the file and device IDs as well as line number associated with
2557 /// the relevant entry source location.
2558 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
2559                                      unsigned &DeviceID, unsigned &FileID,
2560                                      unsigned &LineNum) {
2561   SourceManager &SM = C.getSourceManager();
2562 
2563   // The loc should be always valid and have a file ID (the user cannot use
2564   // #pragma directives in macros)
2565 
2566   assert(Loc.isValid() && "Source location is expected to be always valid.");
2567 
2568   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2569   assert(PLoc.isValid() && "Source location is expected to be always valid.");
2570 
2571   llvm::sys::fs::UniqueID ID;
2572   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2573     SM.getDiagnostics().Report(diag::err_cannot_open_file)
2574         << PLoc.getFilename() << EC.message();
2575 
2576   DeviceID = ID.getDevice();
2577   FileID = ID.getFile();
2578   LineNum = PLoc.getLine();
2579 }
2580 
2581 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
2582   if (CGM.getLangOpts().OpenMPSimd)
2583     return Address::invalid();
2584   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2585       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2586   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
2587               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2588                HasRequiresUnifiedSharedMemory))) {
2589     SmallString<64> PtrName;
2590     {
2591       llvm::raw_svector_ostream OS(PtrName);
2592       OS << CGM.getMangledName(GlobalDecl(VD));
2593       if (!VD->isExternallyVisible()) {
2594         unsigned DeviceID, FileID, Line;
2595         getTargetEntryUniqueInfo(CGM.getContext(),
2596                                  VD->getCanonicalDecl()->getBeginLoc(),
2597                                  DeviceID, FileID, Line);
2598         OS << llvm::format("_%x", FileID);
2599       }
2600       OS << "_decl_tgt_ref_ptr";
2601     }
2602     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2603     if (!Ptr) {
2604       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2605       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
2606                                         PtrName);
2607 
2608       auto *GV = cast<llvm::GlobalVariable>(Ptr);
2609       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
2610 
2611       if (!CGM.getLangOpts().OpenMPIsDevice)
2612         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2613       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2614     }
2615     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2616   }
2617   return Address::invalid();
2618 }
2619 
2620 llvm::Constant *
2621 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
2622   assert(!CGM.getLangOpts().OpenMPUseTLS ||
2623          !CGM.getContext().getTargetInfo().isTLSSupported());
2624   // Lookup the entry, lazily creating it if necessary.
2625   std::string Suffix = getName({"cache", ""});
2626   return getOrCreateInternalVariable(
2627       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
2628 }
2629 
2630 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
2631                                                 const VarDecl *VD,
2632                                                 Address VDAddr,
2633                                                 SourceLocation Loc) {
2634   if (CGM.getLangOpts().OpenMPUseTLS &&
2635       CGM.getContext().getTargetInfo().isTLSSupported())
2636     return VDAddr;
2637 
2638   llvm::Type *VarTy = VDAddr.getElementType();
2639   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2640                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2641                                                        CGM.Int8PtrTy),
2642                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
2643                          getOrCreateThreadPrivateCache(VD)};
2644   return Address(CGF.EmitRuntimeCall(
2645       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2646                  VDAddr.getAlignment());
2647 }
2648 
2649 void CGOpenMPRuntime::emitThreadPrivateVarInit(
2650     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2651     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2652   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2653   // library.
2654   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
2655   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
2656                       OMPLoc);
2657   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2658   // to register constructor/destructor for variable.
2659   llvm::Value *Args[] = {
2660       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
2661       Ctor, CopyCtor, Dtor};
2662   CGF.EmitRuntimeCall(
2663       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
2664 }
2665 
2666 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
2667     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2668     bool PerformInit, CodeGenFunction *CGF) {
2669   if (CGM.getLangOpts().OpenMPUseTLS &&
2670       CGM.getContext().getTargetInfo().isTLSSupported())
2671     return nullptr;
2672 
2673   VD = VD->getDefinition(CGM.getContext());
2674   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
2675     QualType ASTTy = VD->getType();
2676 
2677     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2678     const Expr *Init = VD->getAnyInitializer();
2679     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2680       // Generate function that re-emits the declaration's initializer into the
2681       // threadprivate copy of the variable VD
2682       CodeGenFunction CtorCGF(CGM);
2683       FunctionArgList Args;
2684       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2685                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2686                             ImplicitParamDecl::Other);
2687       Args.push_back(&Dst);
2688 
2689       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2690           CGM.getContext().VoidPtrTy, Args);
2691       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2692       std::string Name = getName({"__kmpc_global_ctor_", ""});
2693       llvm::Function *Fn =
2694           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2695       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2696                             Args, Loc, Loc);
2697       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
2698           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2699           CGM.getContext().VoidPtrTy, Dst.getLocation());
2700       Address Arg = Address(ArgVal, VDAddr.getAlignment());
2701       Arg = CtorCGF.Builder.CreateElementBitCast(
2702           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2703       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2704                                /*IsInitializer=*/true);
2705       ArgVal = CtorCGF.EmitLoadOfScalar(
2706           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2707           CGM.getContext().VoidPtrTy, Dst.getLocation());
2708       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2709       CtorCGF.FinishFunction();
2710       Ctor = Fn;
2711     }
2712     if (VD->getType().isDestructedType() != QualType::DK_none) {
2713       // Generate function that emits destructor call for the threadprivate copy
2714       // of the variable VD
2715       CodeGenFunction DtorCGF(CGM);
2716       FunctionArgList Args;
2717       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2718                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2719                             ImplicitParamDecl::Other);
2720       Args.push_back(&Dst);
2721 
2722       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2723           CGM.getContext().VoidTy, Args);
2724       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2725       std::string Name = getName({"__kmpc_global_dtor_", ""});
2726       llvm::Function *Fn =
2727           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2728       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2729       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2730                             Loc, Loc);
2731       // Create a scope with an artificial location for the body of this function.
2732       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2733       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
2734           DtorCGF.GetAddrOfLocalVar(&Dst),
2735           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2736       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2737                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2738                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2739       DtorCGF.FinishFunction();
2740       Dtor = Fn;
2741     }
2742     // Do not emit init function if it is not required.
2743     if (!Ctor && !Dtor)
2744       return nullptr;
2745 
2746     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2747     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2748                                                /*isVarArg=*/false)
2749                            ->getPointerTo();
2750     // Copying constructor for the threadprivate variable.
2751     // Must be NULL - reserved by runtime, but currently it requires that this
2752     // parameter is always NULL. Otherwise it fires assertion.
2753     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2754     if (Ctor == nullptr) {
2755       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2756                                              /*isVarArg=*/false)
2757                          ->getPointerTo();
2758       Ctor = llvm::Constant::getNullValue(CtorTy);
2759     }
2760     if (Dtor == nullptr) {
2761       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2762                                              /*isVarArg=*/false)
2763                          ->getPointerTo();
2764       Dtor = llvm::Constant::getNullValue(DtorTy);
2765     }
2766     if (!CGF) {
2767       auto *InitFunctionTy =
2768           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2769       std::string Name = getName({"__omp_threadprivate_init_", ""});
2770       llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2771           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
2772       CodeGenFunction InitCGF(CGM);
2773       FunctionArgList ArgList;
2774       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2775                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
2776                             Loc, Loc);
2777       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2778       InitCGF.FinishFunction();
2779       return InitFunction;
2780     }
2781     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2782   }
2783   return nullptr;
2784 }
2785 
2786 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
2787                                                      llvm::GlobalVariable *Addr,
2788                                                      bool PerformInit) {
2789   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2790       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2791   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
2792       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2793        HasRequiresUnifiedSharedMemory))
2794     return CGM.getLangOpts().OpenMPIsDevice;
2795   VD = VD->getDefinition(CGM.getContext());
2796   if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
2797     return CGM.getLangOpts().OpenMPIsDevice;
2798 
2799   QualType ASTTy = VD->getType();
2800 
2801   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
2802   // Produce the unique prefix to identify the new target regions. We use
2803   // the source location of the variable declaration which we know to not
2804   // conflict with any target region.
2805   unsigned DeviceID;
2806   unsigned FileID;
2807   unsigned Line;
2808   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2809   SmallString<128> Buffer, Out;
2810   {
2811     llvm::raw_svector_ostream OS(Buffer);
2812     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2813        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2814   }
2815 
2816   const Expr *Init = VD->getAnyInitializer();
2817   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2818     llvm::Constant *Ctor;
2819     llvm::Constant *ID;
2820     if (CGM.getLangOpts().OpenMPIsDevice) {
2821       // Generate function that re-emits the declaration's initializer into
2822       // the threadprivate copy of the variable VD
2823       CodeGenFunction CtorCGF(CGM);
2824 
2825       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2826       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2827       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2828           FTy, Twine(Buffer, "_ctor"), FI, Loc);
2829       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2830       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2831                             FunctionArgList(), Loc, Loc);
2832       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2833       CtorCGF.EmitAnyExprToMem(Init,
2834                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
2835                                Init->getType().getQualifiers(),
2836                                /*IsInitializer=*/true);
2837       CtorCGF.FinishFunction();
2838       Ctor = Fn;
2839       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2840       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
2841     } else {
2842       Ctor = new llvm::GlobalVariable(
2843           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2844           llvm::GlobalValue::PrivateLinkage,
2845           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2846       ID = Ctor;
2847     }
2848 
2849     // Register the information for the entry associated with the constructor.
2850     Out.clear();
2851     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2852         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2853         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
2854   }
2855   if (VD->getType().isDestructedType() != QualType::DK_none) {
2856     llvm::Constant *Dtor;
2857     llvm::Constant *ID;
2858     if (CGM.getLangOpts().OpenMPIsDevice) {
2859       // Generate function that emits destructor call for the threadprivate
2860       // copy of the variable VD
2861       CodeGenFunction DtorCGF(CGM);
2862 
2863       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2864       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2865       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2866           FTy, Twine(Buffer, "_dtor"), FI, Loc);
2867       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2868       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2869                             FunctionArgList(), Loc, Loc);
2870       // Create a scope with an artificial location for the body of this
2871       // function.
2872       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2873       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2874                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2875                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2876       DtorCGF.FinishFunction();
2877       Dtor = Fn;
2878       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2879       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
2880     } else {
2881       Dtor = new llvm::GlobalVariable(
2882           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2883           llvm::GlobalValue::PrivateLinkage,
2884           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2885       ID = Dtor;
2886     }
2887     // Register the information for the entry associated with the destructor.
2888     Out.clear();
2889     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2890         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2891         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2892   }
2893   return CGM.getLangOpts().OpenMPIsDevice;
2894 }
2895 
2896 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2897                                                           QualType VarType,
2898                                                           StringRef Name) {
2899   std::string Suffix = getName({"artificial", ""});
2900   std::string CacheSuffix = getName({"cache", ""});
2901   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2902   llvm::Value *GAddr =
2903       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2904   llvm::Value *Args[] = {
2905       emitUpdateLocation(CGF, SourceLocation()),
2906       getThreadID(CGF, SourceLocation()),
2907       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2908       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2909                                 /*isSigned=*/false),
2910       getOrCreateInternalVariable(
2911           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2912   return Address(
2913       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2914           CGF.EmitRuntimeCall(
2915               createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2916           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2917       CGM.getPointerAlign());
2918 }
2919 
2920 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
2921                                       const RegionCodeGenTy &ThenGen,
2922                                       const RegionCodeGenTy &ElseGen) {
2923   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2924 
2925   // If the condition constant folds and can be elided, try to avoid emitting
2926   // the condition and the dead arm of the if/else.
2927   bool CondConstant;
2928   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2929     if (CondConstant)
2930       ThenGen(CGF);
2931     else
2932       ElseGen(CGF);
2933     return;
2934   }
2935 
2936   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2937   // emit the conditional branch.
2938   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2939   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2940   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2941   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2942 
2943   // Emit the 'then' code.
2944   CGF.EmitBlock(ThenBlock);
2945   ThenGen(CGF);
2946   CGF.EmitBranch(ContBlock);
2947   // Emit the 'else' code if present.
2948   // There is no need to emit line number for unconditional branch.
2949   (void)ApplyDebugLocation::CreateEmpty(CGF);
2950   CGF.EmitBlock(ElseBlock);
2951   ElseGen(CGF);
2952   // There is no need to emit line number for unconditional branch.
2953   (void)ApplyDebugLocation::CreateEmpty(CGF);
2954   CGF.EmitBranch(ContBlock);
2955   // Emit the continuation block for code after the if.
2956   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2957 }
2958 
2959 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2960                                        llvm::Function *OutlinedFn,
2961                                        ArrayRef<llvm::Value *> CapturedVars,
2962                                        const Expr *IfCond) {
2963   if (!CGF.HaveInsertPoint())
2964     return;
2965   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2966   auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
2967                                                      PrePostActionTy &) {
2968     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2969     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2970     llvm::Value *Args[] = {
2971         RTLoc,
2972         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2973         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2974     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2975     RealArgs.append(std::begin(Args), std::end(Args));
2976     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2977 
2978     llvm::FunctionCallee RTLFn =
2979         RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
2980     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2981   };
2982   auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
2983                                                           PrePostActionTy &) {
2984     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2985     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2986     // Build calls:
2987     // __kmpc_serialized_parallel(&Loc, GTid);
2988     llvm::Value *Args[] = {RTLoc, ThreadID};
2989     CGF.EmitRuntimeCall(
2990         RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
2991 
2992     // OutlinedFn(&GTid, &zero, CapturedStruct);
2993     Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2994                                                         /*Name*/ ".zero.addr");
2995     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
2996     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2997     // ThreadId for serialized parallels is 0.
2998     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2999     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
3000     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
3001     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
3002 
3003     // __kmpc_end_serialized_parallel(&Loc, GTid);
3004     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
3005     CGF.EmitRuntimeCall(
3006         RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
3007         EndArgs);
3008   };
3009   if (IfCond) {
3010     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
3011   } else {
3012     RegionCodeGenTy ThenRCG(ThenGen);
3013     ThenRCG(CGF);
3014   }
3015 }
3016 
3017 // If we're inside an (outlined) parallel region, use the region info's
3018 // thread-ID variable (it is passed in a first argument of the outlined function
3019 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
3020 // regular serial code region, get thread ID by calling kmp_int32
3021 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
3022 // return the address of that temp.
3023 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
3024                                              SourceLocation Loc) {
3025   if (auto *OMPRegionInfo =
3026           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3027     if (OMPRegionInfo->getThreadIDVariable())
3028       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
3029 
3030   llvm::Value *ThreadID = getThreadID(CGF, Loc);
3031   QualType Int32Ty =
3032       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
3033   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
3034   CGF.EmitStoreOfScalar(ThreadID,
3035                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
3036 
3037   return ThreadIDTemp;
3038 }
3039 
3040 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
3041     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
3042   SmallString<256> Buffer;
3043   llvm::raw_svector_ostream Out(Buffer);
3044   Out << Name;
3045   StringRef RuntimeName = Out.str();
3046   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
3047   if (Elem.second) {
3048     assert(Elem.second->getType()->getPointerElementType() == Ty &&
3049            "OMP internal variable has different type than requested");
3050     return &*Elem.second;
3051   }
3052 
3053   return Elem.second = new llvm::GlobalVariable(
3054              CGM.getModule(), Ty, /*IsConstant*/ false,
3055              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
3056              Elem.first(), /*InsertBefore=*/nullptr,
3057              llvm::GlobalValue::NotThreadLocal, AddressSpace);
3058 }
3059 
3060 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
3061   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
3062   std::string Name = getName({Prefix, "var"});
3063   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
3064 }
3065 
3066 namespace {
3067 /// Common pre(post)-action for different OpenMP constructs.
3068 class CommonActionTy final : public PrePostActionTy {
3069   llvm::FunctionCallee EnterCallee;
3070   ArrayRef<llvm::Value *> EnterArgs;
3071   llvm::FunctionCallee ExitCallee;
3072   ArrayRef<llvm::Value *> ExitArgs;
3073   bool Conditional;
3074   llvm::BasicBlock *ContBlock = nullptr;
3075 
3076 public:
3077   CommonActionTy(llvm::FunctionCallee EnterCallee,
3078                  ArrayRef<llvm::Value *> EnterArgs,
3079                  llvm::FunctionCallee ExitCallee,
3080                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
3081       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
3082         ExitArgs(ExitArgs), Conditional(Conditional) {}
3083   void Enter(CodeGenFunction &CGF) override {
3084     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
3085     if (Conditional) {
3086       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
3087       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
3088       ContBlock = CGF.createBasicBlock("omp_if.end");
3089       // Generate the branch (If-stmt)
3090       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
3091       CGF.EmitBlock(ThenBlock);
3092     }
3093   }
3094   void Done(CodeGenFunction &CGF) {
3095     // Emit the rest of blocks/branches
3096     CGF.EmitBranch(ContBlock);
3097     CGF.EmitBlock(ContBlock, true);
3098   }
3099   void Exit(CodeGenFunction &CGF) override {
3100     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
3101   }
3102 };
3103 } // anonymous namespace
3104 
3105 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
3106                                          StringRef CriticalName,
3107                                          const RegionCodeGenTy &CriticalOpGen,
3108                                          SourceLocation Loc, const Expr *Hint) {
3109   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
3110   // CriticalOpGen();
3111   // __kmpc_end_critical(ident_t *, gtid, Lock);
3112   // Prepare arguments and build a call to __kmpc_critical
3113   if (!CGF.HaveInsertPoint())
3114     return;
3115   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3116                          getCriticalRegionLock(CriticalName)};
3117   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
3118                                                 std::end(Args));
3119   if (Hint) {
3120     EnterArgs.push_back(CGF.Builder.CreateIntCast(
3121         CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
3122   }
3123   CommonActionTy Action(
3124       createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
3125                                  : OMPRTL__kmpc_critical),
3126       EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
3127   CriticalOpGen.setAction(Action);
3128   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
3129 }
3130 
3131 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
3132                                        const RegionCodeGenTy &MasterOpGen,
3133                                        SourceLocation Loc) {
3134   if (!CGF.HaveInsertPoint())
3135     return;
3136   // if(__kmpc_master(ident_t *, gtid)) {
3137   //   MasterOpGen();
3138   //   __kmpc_end_master(ident_t *, gtid);
3139   // }
3140   // Prepare arguments and build a call to __kmpc_master
3141   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3142   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
3143                         createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
3144                         /*Conditional=*/true);
3145   MasterOpGen.setAction(Action);
3146   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
3147   Action.Done(CGF);
3148 }
3149 
3150 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
3151                                         SourceLocation Loc) {
3152   if (!CGF.HaveInsertPoint())
3153     return;
3154   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
3155   llvm::Value *Args[] = {
3156       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3157       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
3158   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
3159   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3160     Region->emitUntiedSwitch(CGF);
3161 }
3162 
3163 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
3164                                           const RegionCodeGenTy &TaskgroupOpGen,
3165                                           SourceLocation Loc) {
3166   if (!CGF.HaveInsertPoint())
3167     return;
3168   // __kmpc_taskgroup(ident_t *, gtid);
3169   // TaskgroupOpGen();
3170   // __kmpc_end_taskgroup(ident_t *, gtid);
3171   // Prepare arguments and build a call to __kmpc_taskgroup
3172   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3173   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
3174                         createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
3175                         Args);
3176   TaskgroupOpGen.setAction(Action);
3177   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
3178 }
3179 
3180 /// Given an array of pointers to variables, project the address of a
3181 /// given variable.
3182 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
3183                                       unsigned Index, const VarDecl *Var) {
3184   // Pull out the pointer to the variable.
3185   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
3186   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
3187 
3188   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
3189   Addr = CGF.Builder.CreateElementBitCast(
3190       Addr, CGF.ConvertTypeForMem(Var->getType()));
3191   return Addr;
3192 }
3193 
3194 static llvm::Value *emitCopyprivateCopyFunction(
3195     CodeGenModule &CGM, llvm::Type *ArgsType,
3196     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
3197     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
3198     SourceLocation Loc) {
3199   ASTContext &C = CGM.getContext();
3200   // void copy_func(void *LHSArg, void *RHSArg);
3201   FunctionArgList Args;
3202   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3203                            ImplicitParamDecl::Other);
3204   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3205                            ImplicitParamDecl::Other);
3206   Args.push_back(&LHSArg);
3207   Args.push_back(&RHSArg);
3208   const auto &CGFI =
3209       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3210   std::string Name =
3211       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
3212   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
3213                                     llvm::GlobalValue::InternalLinkage, Name,
3214                                     &CGM.getModule());
3215   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
3216   Fn->setDoesNotRecurse();
3217   CodeGenFunction CGF(CGM);
3218   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
3219   // Dest = (void*[n])(LHSArg);
3220   // Src = (void*[n])(RHSArg);
3221   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3222       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3223       ArgsType), CGF.getPointerAlign());
3224   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3225       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3226       ArgsType), CGF.getPointerAlign());
3227   // *(Type0*)Dst[0] = *(Type0*)Src[0];
3228   // *(Type1*)Dst[1] = *(Type1*)Src[1];
3229   // ...
3230   // *(Typen*)Dst[n] = *(Typen*)Src[n];
3231   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
3232     const auto *DestVar =
3233         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3234     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3235 
3236     const auto *SrcVar =
3237         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3238     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3239 
3240     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3241     QualType Type = VD->getType();
3242     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3243   }
3244   CGF.FinishFunction();
3245   return Fn;
3246 }
3247 
3248 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
3249                                        const RegionCodeGenTy &SingleOpGen,
3250                                        SourceLocation Loc,
3251                                        ArrayRef<const Expr *> CopyprivateVars,
3252                                        ArrayRef<const Expr *> SrcExprs,
3253                                        ArrayRef<const Expr *> DstExprs,
3254                                        ArrayRef<const Expr *> AssignmentOps) {
3255   if (!CGF.HaveInsertPoint())
3256     return;
3257   assert(CopyprivateVars.size() == SrcExprs.size() &&
3258          CopyprivateVars.size() == DstExprs.size() &&
3259          CopyprivateVars.size() == AssignmentOps.size());
3260   ASTContext &C = CGM.getContext();
3261   // int32 did_it = 0;
3262   // if(__kmpc_single(ident_t *, gtid)) {
3263   //   SingleOpGen();
3264   //   __kmpc_end_single(ident_t *, gtid);
3265   //   did_it = 1;
3266   // }
3267   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3268   // <copy_func>, did_it);
3269 
3270   Address DidIt = Address::invalid();
3271   if (!CopyprivateVars.empty()) {
3272     // int32 did_it = 0;
3273     QualType KmpInt32Ty =
3274         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3275     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3276     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3277   }
3278   // Prepare arguments and build a call to __kmpc_single
3279   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3280   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3281                         createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
3282                         /*Conditional=*/true);
3283   SingleOpGen.setAction(Action);
3284   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3285   if (DidIt.isValid()) {
3286     // did_it = 1;
3287     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3288   }
3289   Action.Done(CGF);
3290   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3291   // <copy_func>, did_it);
3292   if (DidIt.isValid()) {
3293     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3294     QualType CopyprivateArrayTy =
3295         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
3296                                /*IndexTypeQuals=*/0);
3297     // Create a list of all private variables for copyprivate.
3298     Address CopyprivateList =
3299         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3300     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
3301       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
3302       CGF.Builder.CreateStore(
3303           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3304               CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
3305           Elem);
3306     }
3307     // Build function that copies private values from single region to all other
3308     // threads in the corresponding parallel region.
3309     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
3310         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
3311         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3312     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3313     Address CL =
3314       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3315                                                       CGF.VoidPtrTy);
3316     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
3317     llvm::Value *Args[] = {
3318         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3319         getThreadID(CGF, Loc),        // i32 <gtid>
3320         BufSize,                      // size_t <buf_size>
3321         CL.getPointer(),              // void *<copyprivate list>
3322         CpyFn,                        // void (*) (void *, void *) <copy_func>
3323         DidItVal                      // i32 did_it
3324     };
3325     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
3326   }
3327 }
3328 
3329 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
3330                                         const RegionCodeGenTy &OrderedOpGen,
3331                                         SourceLocation Loc, bool IsThreads) {
3332   if (!CGF.HaveInsertPoint())
3333     return;
3334   // __kmpc_ordered(ident_t *, gtid);
3335   // OrderedOpGen();
3336   // __kmpc_end_ordered(ident_t *, gtid);
3337   // Prepare arguments and build a call to __kmpc_ordered
3338   if (IsThreads) {
3339     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3340     CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3341                           createRuntimeFunction(OMPRTL__kmpc_end_ordered),
3342                           Args);
3343     OrderedOpGen.setAction(Action);
3344     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3345     return;
3346   }
3347   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3348 }
3349 
3350 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
3351   unsigned Flags;
3352   if (Kind == OMPD_for)
3353     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3354   else if (Kind == OMPD_sections)
3355     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3356   else if (Kind == OMPD_single)
3357     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3358   else if (Kind == OMPD_barrier)
3359     Flags = OMP_IDENT_BARRIER_EXPL;
3360   else
3361     Flags = OMP_IDENT_BARRIER_IMPL;
3362   return Flags;
3363 }
3364 
3365 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
3366     CodeGenFunction &CGF, const OMPLoopDirective &S,
3367     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
3368   // Check if the loop directive is actually a doacross loop directive. In this
3369   // case choose static, 1 schedule.
3370   if (llvm::any_of(
3371           S.getClausesOfKind<OMPOrderedClause>(),
3372           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
3373     ScheduleKind = OMPC_SCHEDULE_static;
3374     // Chunk size is 1 in this case.
3375     llvm::APInt ChunkSize(32, 1);
3376     ChunkExpr = IntegerLiteral::Create(
3377         CGF.getContext(), ChunkSize,
3378         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
3379         SourceLocation());
3380   }
3381 }
3382 
3383 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
3384                                       OpenMPDirectiveKind Kind, bool EmitChecks,
3385                                       bool ForceSimpleCall) {
3386   if (!CGF.HaveInsertPoint())
3387     return;
3388   // Build call __kmpc_cancel_barrier(loc, thread_id);
3389   // Build call __kmpc_barrier(loc, thread_id);
3390   unsigned Flags = getDefaultFlagsForBarriers(Kind);
3391   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3392   // thread_id);
3393   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3394                          getThreadID(CGF, Loc)};
3395   if (auto *OMPRegionInfo =
3396           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
3397     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
3398       llvm::Value *Result = CGF.EmitRuntimeCall(
3399           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
3400       if (EmitChecks) {
3401         // if (__kmpc_cancel_barrier()) {
3402         //   exit from construct;
3403         // }
3404         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
3405         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
3406         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
3407         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3408         CGF.EmitBlock(ExitBB);
3409         //   exit from construct;
3410         CodeGenFunction::JumpDest CancelDestination =
3411             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3412         CGF.EmitBranchThroughCleanup(CancelDestination);
3413         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3414       }
3415       return;
3416     }
3417   }
3418   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
3419 }
3420 
3421 /// Map the OpenMP loop schedule to the runtime enumeration.
3422 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
3423                                           bool Chunked, bool Ordered) {
3424   switch (ScheduleKind) {
3425   case OMPC_SCHEDULE_static:
3426     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
3427                    : (Ordered ? OMP_ord_static : OMP_sch_static);
3428   case OMPC_SCHEDULE_dynamic:
3429     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
3430   case OMPC_SCHEDULE_guided:
3431     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
3432   case OMPC_SCHEDULE_runtime:
3433     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
3434   case OMPC_SCHEDULE_auto:
3435     return Ordered ? OMP_ord_auto : OMP_sch_auto;
3436   case OMPC_SCHEDULE_unknown:
3437     assert(!Chunked && "chunk was specified but schedule kind not known");
3438     return Ordered ? OMP_ord_static : OMP_sch_static;
3439   }
3440   llvm_unreachable("Unexpected runtime schedule");
3441 }
3442 
3443 /// Map the OpenMP distribute schedule to the runtime enumeration.
3444 static OpenMPSchedType
3445 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
3446   // only static is allowed for dist_schedule
3447   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
3448 }
3449 
3450 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
3451                                          bool Chunked) const {
3452   OpenMPSchedType Schedule =
3453       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3454   return Schedule == OMP_sch_static;
3455 }
3456 
3457 bool CGOpenMPRuntime::isStaticNonchunked(
3458     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3459   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3460   return Schedule == OMP_dist_sch_static;
3461 }
3462 
3463 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
3464                                       bool Chunked) const {
3465   OpenMPSchedType Schedule =
3466       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3467   return Schedule == OMP_sch_static_chunked;
3468 }
3469 
3470 bool CGOpenMPRuntime::isStaticChunked(
3471     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3472   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3473   return Schedule == OMP_dist_sch_static_chunked;
3474 }
3475 
3476 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
3477   OpenMPSchedType Schedule =
3478       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3479   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3480   return Schedule != OMP_sch_static;
3481 }
3482 
3483 static int addMonoNonMonoModifier(OpenMPSchedType Schedule,
3484                                   OpenMPScheduleClauseModifier M1,
3485                                   OpenMPScheduleClauseModifier M2) {
3486   int Modifier = 0;
3487   switch (M1) {
3488   case OMPC_SCHEDULE_MODIFIER_monotonic:
3489     Modifier = OMP_sch_modifier_monotonic;
3490     break;
3491   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3492     Modifier = OMP_sch_modifier_nonmonotonic;
3493     break;
3494   case OMPC_SCHEDULE_MODIFIER_simd:
3495     if (Schedule == OMP_sch_static_chunked)
3496       Schedule = OMP_sch_static_balanced_chunked;
3497     break;
3498   case OMPC_SCHEDULE_MODIFIER_last:
3499   case OMPC_SCHEDULE_MODIFIER_unknown:
3500     break;
3501   }
3502   switch (M2) {
3503   case OMPC_SCHEDULE_MODIFIER_monotonic:
3504     Modifier = OMP_sch_modifier_monotonic;
3505     break;
3506   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3507     Modifier = OMP_sch_modifier_nonmonotonic;
3508     break;
3509   case OMPC_SCHEDULE_MODIFIER_simd:
3510     if (Schedule == OMP_sch_static_chunked)
3511       Schedule = OMP_sch_static_balanced_chunked;
3512     break;
3513   case OMPC_SCHEDULE_MODIFIER_last:
3514   case OMPC_SCHEDULE_MODIFIER_unknown:
3515     break;
3516   }
3517   return Schedule | Modifier;
3518 }
3519 
3520 void CGOpenMPRuntime::emitForDispatchInit(
3521     CodeGenFunction &CGF, SourceLocation Loc,
3522     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3523     bool Ordered, const DispatchRTInput &DispatchValues) {
3524   if (!CGF.HaveInsertPoint())
3525     return;
3526   OpenMPSchedType Schedule = getRuntimeSchedule(
3527       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3528   assert(Ordered ||
3529          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3530           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3531           Schedule != OMP_sch_static_balanced_chunked));
3532   // Call __kmpc_dispatch_init(
3533   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3534   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
3535   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
3536 
3537   // If the Chunk was not specified in the clause - use default value 1.
3538   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3539                                             : CGF.Builder.getIntN(IVSize, 1);
3540   llvm::Value *Args[] = {
3541       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3542       CGF.Builder.getInt32(addMonoNonMonoModifier(
3543           Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3544       DispatchValues.LB,                                // Lower
3545       DispatchValues.UB,                                // Upper
3546       CGF.Builder.getIntN(IVSize, 1),                   // Stride
3547       Chunk                                             // Chunk
3548   };
3549   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3550 }
3551 
3552 static void emitForStaticInitCall(
3553     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3554     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
3555     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
3556     const CGOpenMPRuntime::StaticRTInput &Values) {
3557   if (!CGF.HaveInsertPoint())
3558     return;
3559 
3560   assert(!Values.Ordered);
3561   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3562          Schedule == OMP_sch_static_balanced_chunked ||
3563          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3564          Schedule == OMP_dist_sch_static ||
3565          Schedule == OMP_dist_sch_static_chunked);
3566 
3567   // Call __kmpc_for_static_init(
3568   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3569   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3570   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3571   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
3572   llvm::Value *Chunk = Values.Chunk;
3573   if (Chunk == nullptr) {
3574     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3575             Schedule == OMP_dist_sch_static) &&
3576            "expected static non-chunked schedule");
3577     // If the Chunk was not specified in the clause - use default value 1.
3578     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3579   } else {
3580     assert((Schedule == OMP_sch_static_chunked ||
3581             Schedule == OMP_sch_static_balanced_chunked ||
3582             Schedule == OMP_ord_static_chunked ||
3583             Schedule == OMP_dist_sch_static_chunked) &&
3584            "expected static chunked schedule");
3585   }
3586   llvm::Value *Args[] = {
3587       UpdateLocation,
3588       ThreadId,
3589       CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1,
3590                                                   M2)), // Schedule type
3591       Values.IL.getPointer(),                           // &isLastIter
3592       Values.LB.getPointer(),                           // &LB
3593       Values.UB.getPointer(),                           // &UB
3594       Values.ST.getPointer(),                           // &Stride
3595       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
3596       Chunk                                             // Chunk
3597   };
3598   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3599 }
3600 
3601 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
3602                                         SourceLocation Loc,
3603                                         OpenMPDirectiveKind DKind,
3604                                         const OpenMPScheduleTy &ScheduleKind,
3605                                         const StaticRTInput &Values) {
3606   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3607       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3608   assert(isOpenMPWorksharingDirective(DKind) &&
3609          "Expected loop-based or sections-based directive.");
3610   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3611                                              isOpenMPLoopDirective(DKind)
3612                                                  ? OMP_IDENT_WORK_LOOP
3613                                                  : OMP_IDENT_WORK_SECTIONS);
3614   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3615   llvm::FunctionCallee StaticInitFunction =
3616       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3617   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3618                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3619 }
3620 
3621 void CGOpenMPRuntime::emitDistributeStaticInit(
3622     CodeGenFunction &CGF, SourceLocation Loc,
3623     OpenMPDistScheduleClauseKind SchedKind,
3624     const CGOpenMPRuntime::StaticRTInput &Values) {
3625   OpenMPSchedType ScheduleNum =
3626       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3627   llvm::Value *UpdatedLocation =
3628       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3629   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3630   llvm::FunctionCallee StaticInitFunction =
3631       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3632   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3633                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3634                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
3635 }
3636 
3637 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
3638                                           SourceLocation Loc,
3639                                           OpenMPDirectiveKind DKind) {
3640   if (!CGF.HaveInsertPoint())
3641     return;
3642   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3643   llvm::Value *Args[] = {
3644       emitUpdateLocation(CGF, Loc,
3645                          isOpenMPDistributeDirective(DKind)
3646                              ? OMP_IDENT_WORK_DISTRIBUTE
3647                              : isOpenMPLoopDirective(DKind)
3648                                    ? OMP_IDENT_WORK_LOOP
3649                                    : OMP_IDENT_WORK_SECTIONS),
3650       getThreadID(CGF, Loc)};
3651   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
3652                       Args);
3653 }
3654 
3655 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
3656                                                  SourceLocation Loc,
3657                                                  unsigned IVSize,
3658                                                  bool IVSigned) {
3659   if (!CGF.HaveInsertPoint())
3660     return;
3661   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3662   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3663   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3664 }
3665 
3666 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
3667                                           SourceLocation Loc, unsigned IVSize,
3668                                           bool IVSigned, Address IL,
3669                                           Address LB, Address UB,
3670                                           Address ST) {
3671   // Call __kmpc_dispatch_next(
3672   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3673   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3674   //          kmp_int[32|64] *p_stride);
3675   llvm::Value *Args[] = {
3676       emitUpdateLocation(CGF, Loc),
3677       getThreadID(CGF, Loc),
3678       IL.getPointer(), // &isLastIter
3679       LB.getPointer(), // &Lower
3680       UB.getPointer(), // &Upper
3681       ST.getPointer()  // &Stride
3682   };
3683   llvm::Value *Call =
3684       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3685   return CGF.EmitScalarConversion(
3686       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
3687       CGF.getContext().BoolTy, Loc);
3688 }
3689 
3690 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
3691                                            llvm::Value *NumThreads,
3692                                            SourceLocation Loc) {
3693   if (!CGF.HaveInsertPoint())
3694     return;
3695   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3696   llvm::Value *Args[] = {
3697       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3698       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3699   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
3700                       Args);
3701 }
3702 
3703 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
3704                                          OpenMPProcBindClauseKind ProcBind,
3705                                          SourceLocation Loc) {
3706   if (!CGF.HaveInsertPoint())
3707     return;
3708   // Constants for proc bind value accepted by the runtime.
3709   enum ProcBindTy {
3710     ProcBindFalse = 0,
3711     ProcBindTrue,
3712     ProcBindMaster,
3713     ProcBindClose,
3714     ProcBindSpread,
3715     ProcBindIntel,
3716     ProcBindDefault
3717   } RuntimeProcBind;
3718   switch (ProcBind) {
3719   case OMPC_PROC_BIND_master:
3720     RuntimeProcBind = ProcBindMaster;
3721     break;
3722   case OMPC_PROC_BIND_close:
3723     RuntimeProcBind = ProcBindClose;
3724     break;
3725   case OMPC_PROC_BIND_spread:
3726     RuntimeProcBind = ProcBindSpread;
3727     break;
3728   case OMPC_PROC_BIND_unknown:
3729     llvm_unreachable("Unsupported proc_bind value.");
3730   }
3731   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3732   llvm::Value *Args[] = {
3733       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3734       llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
3735   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
3736 }
3737 
3738 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3739                                 SourceLocation Loc) {
3740   if (!CGF.HaveInsertPoint())
3741     return;
3742   // Build call void __kmpc_flush(ident_t *loc)
3743   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
3744                       emitUpdateLocation(CGF, Loc));
3745 }
3746 
3747 namespace {
3748 /// Indexes of fields for type kmp_task_t.
3749 enum KmpTaskTFields {
3750   /// List of shared variables.
3751   KmpTaskTShareds,
3752   /// Task routine.
3753   KmpTaskTRoutine,
3754   /// Partition id for the untied tasks.
3755   KmpTaskTPartId,
3756   /// Function with call of destructors for private variables.
3757   Data1,
3758   /// Task priority.
3759   Data2,
3760   /// (Taskloops only) Lower bound.
3761   KmpTaskTLowerBound,
3762   /// (Taskloops only) Upper bound.
3763   KmpTaskTUpperBound,
3764   /// (Taskloops only) Stride.
3765   KmpTaskTStride,
3766   /// (Taskloops only) Is last iteration flag.
3767   KmpTaskTLastIter,
3768   /// (Taskloops only) Reduction data.
3769   KmpTaskTReductions,
3770 };
3771 } // anonymous namespace
3772 
3773 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3774   return OffloadEntriesTargetRegion.empty() &&
3775          OffloadEntriesDeviceGlobalVar.empty();
3776 }
3777 
3778 /// Initialize target region entry.
3779 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3780     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3781                                     StringRef ParentName, unsigned LineNum,
3782                                     unsigned Order) {
3783   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3784                                              "only required for the device "
3785                                              "code generation.");
3786   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3787       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3788                                    OMPTargetRegionEntryTargetRegion);
3789   ++OffloadingEntriesNum;
3790 }
3791 
3792 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3793     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3794                                   StringRef ParentName, unsigned LineNum,
3795                                   llvm::Constant *Addr, llvm::Constant *ID,
3796                                   OMPTargetRegionEntryKind Flags) {
3797   // If we are emitting code for a target, the entry is already initialized,
3798   // only has to be registered.
3799   if (CGM.getLangOpts().OpenMPIsDevice) {
3800     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3801       unsigned DiagID = CGM.getDiags().getCustomDiagID(
3802           DiagnosticsEngine::Error,
3803           "Unable to find target region on line '%0' in the device code.");
3804       CGM.getDiags().Report(DiagID) << LineNum;
3805       return;
3806     }
3807     auto &Entry =
3808         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3809     assert(Entry.isValid() && "Entry not initialized!");
3810     Entry.setAddress(Addr);
3811     Entry.setID(ID);
3812     Entry.setFlags(Flags);
3813   } else {
3814     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3815     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3816     ++OffloadingEntriesNum;
3817   }
3818 }
3819 
3820 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3821     unsigned DeviceID, unsigned FileID, StringRef ParentName,
3822     unsigned LineNum) const {
3823   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3824   if (PerDevice == OffloadEntriesTargetRegion.end())
3825     return false;
3826   auto PerFile = PerDevice->second.find(FileID);
3827   if (PerFile == PerDevice->second.end())
3828     return false;
3829   auto PerParentName = PerFile->second.find(ParentName);
3830   if (PerParentName == PerFile->second.end())
3831     return false;
3832   auto PerLine = PerParentName->second.find(LineNum);
3833   if (PerLine == PerParentName->second.end())
3834     return false;
3835   // Fail if this entry is already registered.
3836   if (PerLine->second.getAddress() || PerLine->second.getID())
3837     return false;
3838   return true;
3839 }
3840 
3841 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3842     const OffloadTargetRegionEntryInfoActTy &Action) {
3843   // Scan all target region entries and perform the provided action.
3844   for (const auto &D : OffloadEntriesTargetRegion)
3845     for (const auto &F : D.second)
3846       for (const auto &P : F.second)
3847         for (const auto &L : P.second)
3848           Action(D.first, F.first, P.first(), L.first, L.second);
3849 }
3850 
3851 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3852     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3853                                        OMPTargetGlobalVarEntryKind Flags,
3854                                        unsigned Order) {
3855   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3856                                              "only required for the device "
3857                                              "code generation.");
3858   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3859   ++OffloadingEntriesNum;
3860 }
3861 
3862 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3863     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3864                                      CharUnits VarSize,
3865                                      OMPTargetGlobalVarEntryKind Flags,
3866                                      llvm::GlobalValue::LinkageTypes Linkage) {
3867   if (CGM.getLangOpts().OpenMPIsDevice) {
3868     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3869     assert(Entry.isValid() && Entry.getFlags() == Flags &&
3870            "Entry not initialized!");
3871     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3872            "Resetting with the new address.");
3873     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3874       if (Entry.getVarSize().isZero()) {
3875         Entry.setVarSize(VarSize);
3876         Entry.setLinkage(Linkage);
3877       }
3878       return;
3879     }
3880     Entry.setVarSize(VarSize);
3881     Entry.setLinkage(Linkage);
3882     Entry.setAddress(Addr);
3883   } else {
3884     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3885       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3886       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3887              "Entry not initialized!");
3888       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3889              "Resetting with the new address.");
3890       if (Entry.getVarSize().isZero()) {
3891         Entry.setVarSize(VarSize);
3892         Entry.setLinkage(Linkage);
3893       }
3894       return;
3895     }
3896     OffloadEntriesDeviceGlobalVar.try_emplace(
3897         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3898     ++OffloadingEntriesNum;
3899   }
3900 }
3901 
3902 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3903     actOnDeviceGlobalVarEntriesInfo(
3904         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3905   // Scan all target region entries and perform the provided action.
3906   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3907     Action(E.getKey(), E.getValue());
3908 }
3909 
3910 llvm::Function *
3911 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
3912   // If we don't have entries or if we are emitting code for the device, we
3913   // don't need to do anything.
3914   if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
3915     return nullptr;
3916 
3917   llvm::Module &M = CGM.getModule();
3918   ASTContext &C = CGM.getContext();
3919 
3920   // Get list of devices we care about
3921   const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples;
3922 
3923   // We should be creating an offloading descriptor only if there are devices
3924   // specified.
3925   assert(!Devices.empty() && "No OpenMP offloading devices??");
3926 
3927   // Create the external variables that will point to the begin and end of the
3928   // host entries section. These will be defined by the linker.
3929   llvm::Type *OffloadEntryTy =
3930       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
3931   std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"});
3932   auto *HostEntriesBegin = new llvm::GlobalVariable(
3933       M, OffloadEntryTy, /*isConstant=*/true,
3934       llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
3935       EntriesBeginName);
3936   std::string EntriesEndName = getName({"omp_offloading", "entries_end"});
3937   auto *HostEntriesEnd =
3938       new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true,
3939                                llvm::GlobalValue::ExternalLinkage,
3940                                /*Initializer=*/nullptr, EntriesEndName);
3941 
3942   // Create all device images
3943   auto *DeviceImageTy = cast<llvm::StructType>(
3944       CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy()));
3945   ConstantInitBuilder DeviceImagesBuilder(CGM);
3946   ConstantArrayBuilder DeviceImagesEntries =
3947       DeviceImagesBuilder.beginArray(DeviceImageTy);
3948 
3949   for (const llvm::Triple &Device : Devices) {
3950     StringRef T = Device.getTriple();
3951     std::string BeginName = getName({"omp_offloading", "img_start", ""});
3952     auto *ImgBegin = new llvm::GlobalVariable(
3953         M, CGM.Int8Ty, /*isConstant=*/true,
3954         llvm::GlobalValue::ExternalWeakLinkage,
3955         /*Initializer=*/nullptr, Twine(BeginName).concat(T));
3956     std::string EndName = getName({"omp_offloading", "img_end", ""});
3957     auto *ImgEnd = new llvm::GlobalVariable(
3958         M, CGM.Int8Ty, /*isConstant=*/true,
3959         llvm::GlobalValue::ExternalWeakLinkage,
3960         /*Initializer=*/nullptr, Twine(EndName).concat(T));
3961 
3962     llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin,
3963                               HostEntriesEnd};
3964     createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data,
3965                                              DeviceImagesEntries);
3966   }
3967 
3968   // Create device images global array.
3969   std::string ImagesName = getName({"omp_offloading", "device_images"});
3970   llvm::GlobalVariable *DeviceImages =
3971       DeviceImagesEntries.finishAndCreateGlobal(ImagesName,
3972                                                 CGM.getPointerAlign(),
3973                                                 /*isConstant=*/true);
3974   DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3975 
3976   // This is a Zero array to be used in the creation of the constant expressions
3977   llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
3978                              llvm::Constant::getNullValue(CGM.Int32Ty)};
3979 
3980   // Create the target region descriptor.
3981   llvm::Constant *Data[] = {
3982       llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()),
3983       llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(),
3984                                            DeviceImages, Index),
3985       HostEntriesBegin, HostEntriesEnd};
3986   std::string Descriptor = getName({"omp_offloading", "descriptor"});
3987   llvm::GlobalVariable *Desc = createGlobalStruct(
3988       CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor);
3989 
3990   // Emit code to register or unregister the descriptor at execution
3991   // startup or closing, respectively.
3992 
3993   llvm::Function *UnRegFn;
3994   {
3995     FunctionArgList Args;
3996     ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other);
3997     Args.push_back(&DummyPtr);
3998 
3999     CodeGenFunction CGF(CGM);
4000     // Disable debug info for global (de-)initializer because they are not part
4001     // of some particular construct.
4002     CGF.disableDebugInfo();
4003     const auto &FI =
4004         CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4005     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
4006     std::string UnregName = getName({"omp_offloading", "descriptor_unreg"});
4007     UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI);
4008     CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args);
4009     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
4010                         Desc);
4011     CGF.FinishFunction();
4012   }
4013   llvm::Function *RegFn;
4014   {
4015     CodeGenFunction CGF(CGM);
4016     // Disable debug info for global (de-)initializer because they are not part
4017     // of some particular construct.
4018     CGF.disableDebugInfo();
4019     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
4020     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
4021 
4022     // Encode offload target triples into the registration function name. It
4023     // will serve as a comdat key for the registration/unregistration code for
4024     // this particular combination of offloading targets.
4025     SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U);
4026     RegFnNameParts[0] = "omp_offloading";
4027     RegFnNameParts[1] = "descriptor_reg";
4028     llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2),
4029                     [](const llvm::Triple &T) -> const std::string& {
4030                       return T.getTriple();
4031                     });
4032     llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end());
4033     std::string Descriptor = getName(RegFnNameParts);
4034     RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI);
4035     CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList());
4036     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc);
4037     // Create a variable to drive the registration and unregistration of the
4038     // descriptor, so we can reuse the logic that emits Ctors and Dtors.
4039     ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(),
4040                                   SourceLocation(), nullptr, C.CharTy,
4041                                   ImplicitParamDecl::Other);
4042     CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
4043     CGF.FinishFunction();
4044   }
4045   if (CGM.supportsCOMDAT()) {
4046     // It is sufficient to call registration function only once, so create a
4047     // COMDAT group for registration/unregistration functions and associated
4048     // data. That would reduce startup time and code size. Registration
4049     // function serves as a COMDAT group key.
4050     llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName());
4051     RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
4052     RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility);
4053     RegFn->setComdat(ComdatKey);
4054     UnRegFn->setComdat(ComdatKey);
4055     DeviceImages->setComdat(ComdatKey);
4056     Desc->setComdat(ComdatKey);
4057   }
4058   return RegFn;
4059 }
4060 
4061 void CGOpenMPRuntime::createOffloadEntry(
4062     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
4063     llvm::GlobalValue::LinkageTypes Linkage) {
4064   StringRef Name = Addr->getName();
4065   llvm::Module &M = CGM.getModule();
4066   llvm::LLVMContext &C = M.getContext();
4067 
4068   // Create constant string with the name.
4069   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
4070 
4071   std::string StringName = getName({"omp_offloading", "entry_name"});
4072   auto *Str = new llvm::GlobalVariable(
4073       M, StrPtrInit->getType(), /*isConstant=*/true,
4074       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
4075   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
4076 
4077   llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
4078                             llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
4079                             llvm::ConstantInt::get(CGM.SizeTy, Size),
4080                             llvm::ConstantInt::get(CGM.Int32Ty, Flags),
4081                             llvm::ConstantInt::get(CGM.Int32Ty, 0)};
4082   std::string EntryName = getName({"omp_offloading", "entry", ""});
4083   llvm::GlobalVariable *Entry = createGlobalStruct(
4084       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
4085       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
4086 
4087   // The entry has to be created in the section the linker expects it to be.
4088   std::string Section = getName({"omp_offloading", "entries"});
4089   Entry->setSection(Section);
4090 }
4091 
4092 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
4093   // Emit the offloading entries and metadata so that the device codegen side
4094   // can easily figure out what to emit. The produced metadata looks like
4095   // this:
4096   //
4097   // !omp_offload.info = !{!1, ...}
4098   //
4099   // Right now we only generate metadata for function that contain target
4100   // regions.
4101 
4102   // If we do not have entries, we don't need to do anything.
4103   if (OffloadEntriesInfoManager.empty())
4104     return;
4105 
4106   llvm::Module &M = CGM.getModule();
4107   llvm::LLVMContext &C = M.getContext();
4108   SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
4109       OrderedEntries(OffloadEntriesInfoManager.size());
4110   llvm::SmallVector<StringRef, 16> ParentFunctions(
4111       OffloadEntriesInfoManager.size());
4112 
4113   // Auxiliary methods to create metadata values and strings.
4114   auto &&GetMDInt = [this](unsigned V) {
4115     return llvm::ConstantAsMetadata::get(
4116         llvm::ConstantInt::get(CGM.Int32Ty, V));
4117   };
4118 
4119   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
4120 
4121   // Create the offloading info metadata node.
4122   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
4123 
4124   // Create function that emits metadata for each target region entry;
4125   auto &&TargetRegionMetadataEmitter =
4126       [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString](
4127           unsigned DeviceID, unsigned FileID, StringRef ParentName,
4128           unsigned Line,
4129           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
4130         // Generate metadata for target regions. Each entry of this metadata
4131         // contains:
4132         // - Entry 0 -> Kind of this type of metadata (0).
4133         // - Entry 1 -> Device ID of the file where the entry was identified.
4134         // - Entry 2 -> File ID of the file where the entry was identified.
4135         // - Entry 3 -> Mangled name of the function where the entry was
4136         // identified.
4137         // - Entry 4 -> Line in the file where the entry was identified.
4138         // - Entry 5 -> Order the entry was created.
4139         // The first element of the metadata node is the kind.
4140         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
4141                                  GetMDInt(FileID),      GetMDString(ParentName),
4142                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
4143 
4144         // Save this entry in the right position of the ordered entries array.
4145         OrderedEntries[E.getOrder()] = &E;
4146         ParentFunctions[E.getOrder()] = ParentName;
4147 
4148         // Add metadata to the named metadata node.
4149         MD->addOperand(llvm::MDNode::get(C, Ops));
4150       };
4151 
4152   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
4153       TargetRegionMetadataEmitter);
4154 
4155   // Create function that emits metadata for each device global variable entry;
4156   auto &&DeviceGlobalVarMetadataEmitter =
4157       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
4158        MD](StringRef MangledName,
4159            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
4160                &E) {
4161         // Generate metadata for global variables. Each entry of this metadata
4162         // contains:
4163         // - Entry 0 -> Kind of this type of metadata (1).
4164         // - Entry 1 -> Mangled name of the variable.
4165         // - Entry 2 -> Declare target kind.
4166         // - Entry 3 -> Order the entry was created.
4167         // The first element of the metadata node is the kind.
4168         llvm::Metadata *Ops[] = {
4169             GetMDInt(E.getKind()), GetMDString(MangledName),
4170             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
4171 
4172         // Save this entry in the right position of the ordered entries array.
4173         OrderedEntries[E.getOrder()] = &E;
4174 
4175         // Add metadata to the named metadata node.
4176         MD->addOperand(llvm::MDNode::get(C, Ops));
4177       };
4178 
4179   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
4180       DeviceGlobalVarMetadataEmitter);
4181 
4182   for (const auto *E : OrderedEntries) {
4183     assert(E && "All ordered entries must exist!");
4184     if (const auto *CE =
4185             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
4186                 E)) {
4187       if (!CE->getID() || !CE->getAddress()) {
4188         // Do not blame the entry if the parent funtion is not emitted.
4189         StringRef FnName = ParentFunctions[CE->getOrder()];
4190         if (!CGM.GetGlobalValue(FnName))
4191           continue;
4192         unsigned DiagID = CGM.getDiags().getCustomDiagID(
4193             DiagnosticsEngine::Error,
4194             "Offloading entry for target region is incorrect: either the "
4195             "address or the ID is invalid.");
4196         CGM.getDiags().Report(DiagID);
4197         continue;
4198       }
4199       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
4200                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
4201     } else if (const auto *CE =
4202                    dyn_cast<OffloadEntriesInfoManagerTy::
4203                                 OffloadEntryInfoDeviceGlobalVar>(E)) {
4204       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
4205           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4206               CE->getFlags());
4207       switch (Flags) {
4208       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
4209         if (CGM.getLangOpts().OpenMPIsDevice &&
4210             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
4211           continue;
4212         if (!CE->getAddress()) {
4213           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4214               DiagnosticsEngine::Error,
4215               "Offloading entry for declare target variable is incorrect: the "
4216               "address is invalid.");
4217           CGM.getDiags().Report(DiagID);
4218           continue;
4219         }
4220         // The vaiable has no definition - no need to add the entry.
4221         if (CE->getVarSize().isZero())
4222           continue;
4223         break;
4224       }
4225       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
4226         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
4227                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
4228                "Declaret target link address is set.");
4229         if (CGM.getLangOpts().OpenMPIsDevice)
4230           continue;
4231         if (!CE->getAddress()) {
4232           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4233               DiagnosticsEngine::Error,
4234               "Offloading entry for declare target variable is incorrect: the "
4235               "address is invalid.");
4236           CGM.getDiags().Report(DiagID);
4237           continue;
4238         }
4239         break;
4240       }
4241       createOffloadEntry(CE->getAddress(), CE->getAddress(),
4242                          CE->getVarSize().getQuantity(), Flags,
4243                          CE->getLinkage());
4244     } else {
4245       llvm_unreachable("Unsupported entry kind.");
4246     }
4247   }
4248 }
4249 
4250 /// Loads all the offload entries information from the host IR
4251 /// metadata.
4252 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
4253   // If we are in target mode, load the metadata from the host IR. This code has
4254   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
4255 
4256   if (!CGM.getLangOpts().OpenMPIsDevice)
4257     return;
4258 
4259   if (CGM.getLangOpts().OMPHostIRFile.empty())
4260     return;
4261 
4262   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
4263   if (auto EC = Buf.getError()) {
4264     CGM.getDiags().Report(diag::err_cannot_open_file)
4265         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4266     return;
4267   }
4268 
4269   llvm::LLVMContext C;
4270   auto ME = expectedToErrorOrAndEmitErrors(
4271       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
4272 
4273   if (auto EC = ME.getError()) {
4274     unsigned DiagID = CGM.getDiags().getCustomDiagID(
4275         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
4276     CGM.getDiags().Report(DiagID)
4277         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4278     return;
4279   }
4280 
4281   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
4282   if (!MD)
4283     return;
4284 
4285   for (llvm::MDNode *MN : MD->operands()) {
4286     auto &&GetMDInt = [MN](unsigned Idx) {
4287       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
4288       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
4289     };
4290 
4291     auto &&GetMDString = [MN](unsigned Idx) {
4292       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
4293       return V->getString();
4294     };
4295 
4296     switch (GetMDInt(0)) {
4297     default:
4298       llvm_unreachable("Unexpected metadata!");
4299       break;
4300     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4301         OffloadingEntryInfoTargetRegion:
4302       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
4303           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
4304           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
4305           /*Order=*/GetMDInt(5));
4306       break;
4307     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4308         OffloadingEntryInfoDeviceGlobalVar:
4309       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
4310           /*MangledName=*/GetMDString(1),
4311           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4312               /*Flags=*/GetMDInt(2)),
4313           /*Order=*/GetMDInt(3));
4314       break;
4315     }
4316   }
4317 }
4318 
4319 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
4320   if (!KmpRoutineEntryPtrTy) {
4321     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
4322     ASTContext &C = CGM.getContext();
4323     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
4324     FunctionProtoType::ExtProtoInfo EPI;
4325     KmpRoutineEntryPtrQTy = C.getPointerType(
4326         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
4327     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
4328   }
4329 }
4330 
4331 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
4332   // Make sure the type of the entry is already created. This is the type we
4333   // have to create:
4334   // struct __tgt_offload_entry{
4335   //   void      *addr;       // Pointer to the offload entry info.
4336   //                          // (function or global)
4337   //   char      *name;       // Name of the function or global.
4338   //   size_t     size;       // Size of the entry info (0 if it a function).
4339   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
4340   //   int32_t    reserved;   // Reserved, to use by the runtime library.
4341   // };
4342   if (TgtOffloadEntryQTy.isNull()) {
4343     ASTContext &C = CGM.getContext();
4344     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
4345     RD->startDefinition();
4346     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4347     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
4348     addFieldToRecordDecl(C, RD, C.getSizeType());
4349     addFieldToRecordDecl(
4350         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4351     addFieldToRecordDecl(
4352         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4353     RD->completeDefinition();
4354     RD->addAttr(PackedAttr::CreateImplicit(C));
4355     TgtOffloadEntryQTy = C.getRecordType(RD);
4356   }
4357   return TgtOffloadEntryQTy;
4358 }
4359 
4360 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() {
4361   // These are the types we need to build:
4362   // struct __tgt_device_image{
4363   // void   *ImageStart;       // Pointer to the target code start.
4364   // void   *ImageEnd;         // Pointer to the target code end.
4365   // // We also add the host entries to the device image, as it may be useful
4366   // // for the target runtime to have access to that information.
4367   // __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all
4368   //                                       // the entries.
4369   // __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
4370   //                                       // entries (non inclusive).
4371   // };
4372   if (TgtDeviceImageQTy.isNull()) {
4373     ASTContext &C = CGM.getContext();
4374     RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image");
4375     RD->startDefinition();
4376     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4377     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4378     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4379     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4380     RD->completeDefinition();
4381     TgtDeviceImageQTy = C.getRecordType(RD);
4382   }
4383   return TgtDeviceImageQTy;
4384 }
4385 
4386 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() {
4387   // struct __tgt_bin_desc{
4388   //   int32_t              NumDevices;      // Number of devices supported.
4389   //   __tgt_device_image   *DeviceImages;   // Arrays of device images
4390   //                                         // (one per device).
4391   //   __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all the
4392   //                                         // entries.
4393   //   __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
4394   //                                         // entries (non inclusive).
4395   // };
4396   if (TgtBinaryDescriptorQTy.isNull()) {
4397     ASTContext &C = CGM.getContext();
4398     RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc");
4399     RD->startDefinition();
4400     addFieldToRecordDecl(
4401         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4402     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy()));
4403     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4404     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4405     RD->completeDefinition();
4406     TgtBinaryDescriptorQTy = C.getRecordType(RD);
4407   }
4408   return TgtBinaryDescriptorQTy;
4409 }
4410 
4411 namespace {
4412 struct PrivateHelpersTy {
4413   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
4414                    const VarDecl *PrivateElemInit)
4415       : Original(Original), PrivateCopy(PrivateCopy),
4416         PrivateElemInit(PrivateElemInit) {}
4417   const VarDecl *Original;
4418   const VarDecl *PrivateCopy;
4419   const VarDecl *PrivateElemInit;
4420 };
4421 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4422 } // anonymous namespace
4423 
4424 static RecordDecl *
4425 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
4426   if (!Privates.empty()) {
4427     ASTContext &C = CGM.getContext();
4428     // Build struct .kmp_privates_t. {
4429     //         /*  private vars  */
4430     //       };
4431     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
4432     RD->startDefinition();
4433     for (const auto &Pair : Privates) {
4434       const VarDecl *VD = Pair.second.Original;
4435       QualType Type = VD->getType().getNonReferenceType();
4436       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
4437       if (VD->hasAttrs()) {
4438         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4439              E(VD->getAttrs().end());
4440              I != E; ++I)
4441           FD->addAttr(*I);
4442       }
4443     }
4444     RD->completeDefinition();
4445     return RD;
4446   }
4447   return nullptr;
4448 }
4449 
4450 static RecordDecl *
4451 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
4452                          QualType KmpInt32Ty,
4453                          QualType KmpRoutineEntryPointerQTy) {
4454   ASTContext &C = CGM.getContext();
4455   // Build struct kmp_task_t {
4456   //         void *              shareds;
4457   //         kmp_routine_entry_t routine;
4458   //         kmp_int32           part_id;
4459   //         kmp_cmplrdata_t data1;
4460   //         kmp_cmplrdata_t data2;
4461   // For taskloops additional fields:
4462   //         kmp_uint64          lb;
4463   //         kmp_uint64          ub;
4464   //         kmp_int64           st;
4465   //         kmp_int32           liter;
4466   //         void *              reductions;
4467   //       };
4468   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4469   UD->startDefinition();
4470   addFieldToRecordDecl(C, UD, KmpInt32Ty);
4471   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4472   UD->completeDefinition();
4473   QualType KmpCmplrdataTy = C.getRecordType(UD);
4474   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
4475   RD->startDefinition();
4476   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4477   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4478   addFieldToRecordDecl(C, RD, KmpInt32Ty);
4479   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4480   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4481   if (isOpenMPTaskLoopDirective(Kind)) {
4482     QualType KmpUInt64Ty =
4483         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4484     QualType KmpInt64Ty =
4485         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4486     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4487     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4488     addFieldToRecordDecl(C, RD, KmpInt64Ty);
4489     addFieldToRecordDecl(C, RD, KmpInt32Ty);
4490     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4491   }
4492   RD->completeDefinition();
4493   return RD;
4494 }
4495 
4496 static RecordDecl *
4497 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
4498                                      ArrayRef<PrivateDataTy> Privates) {
4499   ASTContext &C = CGM.getContext();
4500   // Build struct kmp_task_t_with_privates {
4501   //         kmp_task_t task_data;
4502   //         .kmp_privates_t. privates;
4503   //       };
4504   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4505   RD->startDefinition();
4506   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4507   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
4508     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4509   RD->completeDefinition();
4510   return RD;
4511 }
4512 
4513 /// Emit a proxy function which accepts kmp_task_t as the second
4514 /// argument.
4515 /// \code
4516 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4517 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4518 ///   For taskloops:
4519 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4520 ///   tt->reductions, tt->shareds);
4521 ///   return 0;
4522 /// }
4523 /// \endcode
4524 static llvm::Function *
4525 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
4526                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4527                       QualType KmpTaskTWithPrivatesPtrQTy,
4528                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4529                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
4530                       llvm::Value *TaskPrivatesMap) {
4531   ASTContext &C = CGM.getContext();
4532   FunctionArgList Args;
4533   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4534                             ImplicitParamDecl::Other);
4535   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4536                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4537                                 ImplicitParamDecl::Other);
4538   Args.push_back(&GtidArg);
4539   Args.push_back(&TaskTypeArg);
4540   const auto &TaskEntryFnInfo =
4541       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4542   llvm::FunctionType *TaskEntryTy =
4543       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4544   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
4545   auto *TaskEntry = llvm::Function::Create(
4546       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4547   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4548   TaskEntry->setDoesNotRecurse();
4549   CodeGenFunction CGF(CGM);
4550   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4551                     Loc, Loc);
4552 
4553   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4554   // tt,
4555   // For taskloops:
4556   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4557   // tt->task_data.shareds);
4558   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
4559       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4560   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4561       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4562       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4563   const auto *KmpTaskTWithPrivatesQTyRD =
4564       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4565   LValue Base =
4566       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4567   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4568   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4569   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4570   llvm::Value *PartidParam = PartIdLVal.getPointer();
4571 
4572   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4573   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4574   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4575       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4576       CGF.ConvertTypeForMem(SharedsPtrTy));
4577 
4578   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4579   llvm::Value *PrivatesParam;
4580   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4581     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4582     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4583         PrivatesLVal.getPointer(), CGF.VoidPtrTy);
4584   } else {
4585     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4586   }
4587 
4588   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4589                                TaskPrivatesMap,
4590                                CGF.Builder
4591                                    .CreatePointerBitCastOrAddrSpaceCast(
4592                                        TDBase.getAddress(), CGF.VoidPtrTy)
4593                                    .getPointer()};
4594   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4595                                           std::end(CommonArgs));
4596   if (isOpenMPTaskLoopDirective(Kind)) {
4597     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4598     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4599     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4600     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4601     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4602     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4603     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4604     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
4605     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4606     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4607     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4608     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4609     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4610     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
4611     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4612     CallArgs.push_back(LBParam);
4613     CallArgs.push_back(UBParam);
4614     CallArgs.push_back(StParam);
4615     CallArgs.push_back(LIParam);
4616     CallArgs.push_back(RParam);
4617   }
4618   CallArgs.push_back(SharedsParam);
4619 
4620   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4621                                                   CallArgs);
4622   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4623                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4624   CGF.FinishFunction();
4625   return TaskEntry;
4626 }
4627 
4628 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
4629                                             SourceLocation Loc,
4630                                             QualType KmpInt32Ty,
4631                                             QualType KmpTaskTWithPrivatesPtrQTy,
4632                                             QualType KmpTaskTWithPrivatesQTy) {
4633   ASTContext &C = CGM.getContext();
4634   FunctionArgList Args;
4635   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4636                             ImplicitParamDecl::Other);
4637   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4638                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4639                                 ImplicitParamDecl::Other);
4640   Args.push_back(&GtidArg);
4641   Args.push_back(&TaskTypeArg);
4642   const auto &DestructorFnInfo =
4643       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4644   llvm::FunctionType *DestructorFnTy =
4645       CGM.getTypes().GetFunctionType(DestructorFnInfo);
4646   std::string Name =
4647       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
4648   auto *DestructorFn =
4649       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
4650                              Name, &CGM.getModule());
4651   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4652                                     DestructorFnInfo);
4653   DestructorFn->setDoesNotRecurse();
4654   CodeGenFunction CGF(CGM);
4655   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4656                     Args, Loc, Loc);
4657 
4658   LValue Base = CGF.EmitLoadOfPointerLValue(
4659       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4660       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4661   const auto *KmpTaskTWithPrivatesQTyRD =
4662       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4663   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4664   Base = CGF.EmitLValueForField(Base, *FI);
4665   for (const auto *Field :
4666        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4667     if (QualType::DestructionKind DtorKind =
4668             Field->getType().isDestructedType()) {
4669       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
4670       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
4671     }
4672   }
4673   CGF.FinishFunction();
4674   return DestructorFn;
4675 }
4676 
4677 /// Emit a privates mapping function for correct handling of private and
4678 /// firstprivate variables.
4679 /// \code
4680 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4681 /// **noalias priv1,...,  <tyn> **noalias privn) {
4682 ///   *priv1 = &.privates.priv1;
4683 ///   ...;
4684 ///   *privn = &.privates.privn;
4685 /// }
4686 /// \endcode
4687 static llvm::Value *
4688 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
4689                                ArrayRef<const Expr *> PrivateVars,
4690                                ArrayRef<const Expr *> FirstprivateVars,
4691                                ArrayRef<const Expr *> LastprivateVars,
4692                                QualType PrivatesQTy,
4693                                ArrayRef<PrivateDataTy> Privates) {
4694   ASTContext &C = CGM.getContext();
4695   FunctionArgList Args;
4696   ImplicitParamDecl TaskPrivatesArg(
4697       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4698       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4699       ImplicitParamDecl::Other);
4700   Args.push_back(&TaskPrivatesArg);
4701   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4702   unsigned Counter = 1;
4703   for (const Expr *E : PrivateVars) {
4704     Args.push_back(ImplicitParamDecl::Create(
4705         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4706         C.getPointerType(C.getPointerType(E->getType()))
4707             .withConst()
4708             .withRestrict(),
4709         ImplicitParamDecl::Other));
4710     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4711     PrivateVarsPos[VD] = Counter;
4712     ++Counter;
4713   }
4714   for (const Expr *E : FirstprivateVars) {
4715     Args.push_back(ImplicitParamDecl::Create(
4716         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4717         C.getPointerType(C.getPointerType(E->getType()))
4718             .withConst()
4719             .withRestrict(),
4720         ImplicitParamDecl::Other));
4721     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4722     PrivateVarsPos[VD] = Counter;
4723     ++Counter;
4724   }
4725   for (const Expr *E : LastprivateVars) {
4726     Args.push_back(ImplicitParamDecl::Create(
4727         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4728         C.getPointerType(C.getPointerType(E->getType()))
4729             .withConst()
4730             .withRestrict(),
4731         ImplicitParamDecl::Other));
4732     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4733     PrivateVarsPos[VD] = Counter;
4734     ++Counter;
4735   }
4736   const auto &TaskPrivatesMapFnInfo =
4737       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4738   llvm::FunctionType *TaskPrivatesMapTy =
4739       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4740   std::string Name =
4741       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
4742   auto *TaskPrivatesMap = llvm::Function::Create(
4743       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
4744       &CGM.getModule());
4745   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
4746                                     TaskPrivatesMapFnInfo);
4747   if (CGM.getLangOpts().Optimize) {
4748     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4749     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4750     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4751   }
4752   CodeGenFunction CGF(CGM);
4753   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4754                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
4755 
4756   // *privi = &.privates.privi;
4757   LValue Base = CGF.EmitLoadOfPointerLValue(
4758       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4759       TaskPrivatesArg.getType()->castAs<PointerType>());
4760   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4761   Counter = 0;
4762   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
4763     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
4764     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4765     LValue RefLVal =
4766         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4767     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4768         RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
4769     CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
4770     ++Counter;
4771   }
4772   CGF.FinishFunction();
4773   return TaskPrivatesMap;
4774 }
4775 
4776 /// Emit initialization for private variables in task-based directives.
4777 static void emitPrivatesInit(CodeGenFunction &CGF,
4778                              const OMPExecutableDirective &D,
4779                              Address KmpTaskSharedsPtr, LValue TDBase,
4780                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4781                              QualType SharedsTy, QualType SharedsPtrTy,
4782                              const OMPTaskDataTy &Data,
4783                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4784   ASTContext &C = CGF.getContext();
4785   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4786   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4787   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
4788                                  ? OMPD_taskloop
4789                                  : OMPD_task;
4790   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
4791   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
4792   LValue SrcBase;
4793   bool IsTargetTask =
4794       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
4795       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
4796   // For target-based directives skip 3 firstprivate arrays BasePointersArray,
4797   // PointersArray and SizesArray. The original variables for these arrays are
4798   // not captured and we get their addresses explicitly.
4799   if ((!IsTargetTask && !Data.FirstprivateVars.empty()) ||
4800       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
4801     SrcBase = CGF.MakeAddrLValue(
4802         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4803             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4804         SharedsTy);
4805   }
4806   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4807   for (const PrivateDataTy &Pair : Privates) {
4808     const VarDecl *VD = Pair.second.PrivateCopy;
4809     const Expr *Init = VD->getAnyInitializer();
4810     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4811                              !CGF.isTrivialInitializer(Init)))) {
4812       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4813       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
4814         const VarDecl *OriginalVD = Pair.second.Original;
4815         // Check if the variable is the target-based BasePointersArray,
4816         // PointersArray or SizesArray.
4817         LValue SharedRefLValue;
4818         QualType Type = PrivateLValue.getType();
4819         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
4820         if (IsTargetTask && !SharedField) {
4821           assert(isa<ImplicitParamDecl>(OriginalVD) &&
4822                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
4823                  cast<CapturedDecl>(OriginalVD->getDeclContext())
4824                          ->getNumParams() == 0 &&
4825                  isa<TranslationUnitDecl>(
4826                      cast<CapturedDecl>(OriginalVD->getDeclContext())
4827                          ->getDeclContext()) &&
4828                  "Expected artificial target data variable.");
4829           SharedRefLValue =
4830               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
4831         } else {
4832           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4833           SharedRefLValue = CGF.MakeAddrLValue(
4834               Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
4835               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
4836               SharedRefLValue.getTBAAInfo());
4837         }
4838         if (Type->isArrayType()) {
4839           // Initialize firstprivate array.
4840           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4841             // Perform simple memcpy.
4842             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
4843           } else {
4844             // Initialize firstprivate array using element-by-element
4845             // initialization.
4846             CGF.EmitOMPAggregateAssign(
4847                 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
4848                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4849                                                   Address SrcElement) {
4850                   // Clean up any temporaries needed by the initialization.
4851                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
4852                   InitScope.addPrivate(
4853                       Elem, [SrcElement]() -> Address { return SrcElement; });
4854                   (void)InitScope.Privatize();
4855                   // Emit initialization for single element.
4856                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
4857                       CGF, &CapturesInfo);
4858                   CGF.EmitAnyExprToMem(Init, DestElement,
4859                                        Init->getType().getQualifiers(),
4860                                        /*IsInitializer=*/false);
4861                 });
4862           }
4863         } else {
4864           CodeGenFunction::OMPPrivateScope InitScope(CGF);
4865           InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
4866             return SharedRefLValue.getAddress();
4867           });
4868           (void)InitScope.Privatize();
4869           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4870           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4871                              /*capturedByInit=*/false);
4872         }
4873       } else {
4874         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4875       }
4876     }
4877     ++FI;
4878   }
4879 }
4880 
4881 /// Check if duplication function is required for taskloops.
4882 static bool checkInitIsRequired(CodeGenFunction &CGF,
4883                                 ArrayRef<PrivateDataTy> Privates) {
4884   bool InitRequired = false;
4885   for (const PrivateDataTy &Pair : Privates) {
4886     const VarDecl *VD = Pair.second.PrivateCopy;
4887     const Expr *Init = VD->getAnyInitializer();
4888     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4889                                     !CGF.isTrivialInitializer(Init));
4890     if (InitRequired)
4891       break;
4892   }
4893   return InitRequired;
4894 }
4895 
4896 
4897 /// Emit task_dup function (for initialization of
4898 /// private/firstprivate/lastprivate vars and last_iter flag)
4899 /// \code
4900 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4901 /// lastpriv) {
4902 /// // setup lastprivate flag
4903 ///    task_dst->last = lastpriv;
4904 /// // could be constructor calls here...
4905 /// }
4906 /// \endcode
4907 static llvm::Value *
4908 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4909                     const OMPExecutableDirective &D,
4910                     QualType KmpTaskTWithPrivatesPtrQTy,
4911                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4912                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4913                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4914                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4915   ASTContext &C = CGM.getContext();
4916   FunctionArgList Args;
4917   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4918                            KmpTaskTWithPrivatesPtrQTy,
4919                            ImplicitParamDecl::Other);
4920   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4921                            KmpTaskTWithPrivatesPtrQTy,
4922                            ImplicitParamDecl::Other);
4923   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4924                                 ImplicitParamDecl::Other);
4925   Args.push_back(&DstArg);
4926   Args.push_back(&SrcArg);
4927   Args.push_back(&LastprivArg);
4928   const auto &TaskDupFnInfo =
4929       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4930   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4931   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4932   auto *TaskDup = llvm::Function::Create(
4933       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4934   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4935   TaskDup->setDoesNotRecurse();
4936   CodeGenFunction CGF(CGM);
4937   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4938                     Loc);
4939 
4940   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4941       CGF.GetAddrOfLocalVar(&DstArg),
4942       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4943   // task_dst->liter = lastpriv;
4944   if (WithLastIter) {
4945     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4946     LValue Base = CGF.EmitLValueForField(
4947         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4948     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4949     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4950         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4951     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4952   }
4953 
4954   // Emit initial values for private copies (if any).
4955   assert(!Privates.empty());
4956   Address KmpTaskSharedsPtr = Address::invalid();
4957   if (!Data.FirstprivateVars.empty()) {
4958     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4959         CGF.GetAddrOfLocalVar(&SrcArg),
4960         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4961     LValue Base = CGF.EmitLValueForField(
4962         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4963     KmpTaskSharedsPtr = Address(
4964         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4965                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4966                                                   KmpTaskTShareds)),
4967                              Loc),
4968         CGF.getNaturalTypeAlignment(SharedsTy));
4969   }
4970   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4971                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4972   CGF.FinishFunction();
4973   return TaskDup;
4974 }
4975 
4976 /// Checks if destructor function is required to be generated.
4977 /// \return true if cleanups are required, false otherwise.
4978 static bool
4979 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4980   bool NeedsCleanup = false;
4981   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4982   const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4983   for (const FieldDecl *FD : PrivateRD->fields()) {
4984     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4985     if (NeedsCleanup)
4986       break;
4987   }
4988   return NeedsCleanup;
4989 }
4990 
4991 CGOpenMPRuntime::TaskResultTy
4992 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4993                               const OMPExecutableDirective &D,
4994                               llvm::Function *TaskFunction, QualType SharedsTy,
4995                               Address Shareds, const OMPTaskDataTy &Data) {
4996   ASTContext &C = CGM.getContext();
4997   llvm::SmallVector<PrivateDataTy, 4> Privates;
4998   // Aggregate privates and sort them by the alignment.
4999   auto I = Data.PrivateCopies.begin();
5000   for (const Expr *E : Data.PrivateVars) {
5001     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5002     Privates.emplace_back(
5003         C.getDeclAlign(VD),
5004         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
5005                          /*PrivateElemInit=*/nullptr));
5006     ++I;
5007   }
5008   I = Data.FirstprivateCopies.begin();
5009   auto IElemInitRef = Data.FirstprivateInits.begin();
5010   for (const Expr *E : Data.FirstprivateVars) {
5011     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5012     Privates.emplace_back(
5013         C.getDeclAlign(VD),
5014         PrivateHelpersTy(
5015             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
5016             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
5017     ++I;
5018     ++IElemInitRef;
5019   }
5020   I = Data.LastprivateCopies.begin();
5021   for (const Expr *E : Data.LastprivateVars) {
5022     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5023     Privates.emplace_back(
5024         C.getDeclAlign(VD),
5025         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
5026                          /*PrivateElemInit=*/nullptr));
5027     ++I;
5028   }
5029   llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
5030     return L.first > R.first;
5031   });
5032   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
5033   // Build type kmp_routine_entry_t (if not built yet).
5034   emitKmpRoutineEntryT(KmpInt32Ty);
5035   // Build type kmp_task_t (if not built yet).
5036   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
5037     if (SavedKmpTaskloopTQTy.isNull()) {
5038       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
5039           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5040     }
5041     KmpTaskTQTy = SavedKmpTaskloopTQTy;
5042   } else {
5043     assert((D.getDirectiveKind() == OMPD_task ||
5044             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
5045             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
5046            "Expected taskloop, task or target directive");
5047     if (SavedKmpTaskTQTy.isNull()) {
5048       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
5049           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5050     }
5051     KmpTaskTQTy = SavedKmpTaskTQTy;
5052   }
5053   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
5054   // Build particular struct kmp_task_t for the given task.
5055   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
5056       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
5057   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
5058   QualType KmpTaskTWithPrivatesPtrQTy =
5059       C.getPointerType(KmpTaskTWithPrivatesQTy);
5060   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
5061   llvm::Type *KmpTaskTWithPrivatesPtrTy =
5062       KmpTaskTWithPrivatesTy->getPointerTo();
5063   llvm::Value *KmpTaskTWithPrivatesTySize =
5064       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
5065   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
5066 
5067   // Emit initial values for private copies (if any).
5068   llvm::Value *TaskPrivatesMap = nullptr;
5069   llvm::Type *TaskPrivatesMapTy =
5070       std::next(TaskFunction->arg_begin(), 3)->getType();
5071   if (!Privates.empty()) {
5072     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
5073     TaskPrivatesMap = emitTaskPrivateMappingFunction(
5074         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
5075         FI->getType(), Privates);
5076     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5077         TaskPrivatesMap, TaskPrivatesMapTy);
5078   } else {
5079     TaskPrivatesMap = llvm::ConstantPointerNull::get(
5080         cast<llvm::PointerType>(TaskPrivatesMapTy));
5081   }
5082   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
5083   // kmp_task_t *tt);
5084   llvm::Function *TaskEntry = emitProxyTaskFunction(
5085       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5086       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
5087       TaskPrivatesMap);
5088 
5089   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
5090   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
5091   // kmp_routine_entry_t *task_entry);
5092   // Task flags. Format is taken from
5093   // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
5094   // description of kmp_tasking_flags struct.
5095   enum {
5096     TiedFlag = 0x1,
5097     FinalFlag = 0x2,
5098     DestructorsFlag = 0x8,
5099     PriorityFlag = 0x20
5100   };
5101   unsigned Flags = Data.Tied ? TiedFlag : 0;
5102   bool NeedsCleanup = false;
5103   if (!Privates.empty()) {
5104     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
5105     if (NeedsCleanup)
5106       Flags = Flags | DestructorsFlag;
5107   }
5108   if (Data.Priority.getInt())
5109     Flags = Flags | PriorityFlag;
5110   llvm::Value *TaskFlags =
5111       Data.Final.getPointer()
5112           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
5113                                      CGF.Builder.getInt32(FinalFlag),
5114                                      CGF.Builder.getInt32(/*C=*/0))
5115           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
5116   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
5117   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
5118   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
5119       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
5120       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5121           TaskEntry, KmpRoutineEntryPtrTy)};
5122   llvm::Value *NewTask;
5123   if (D.hasClausesOfKind<OMPNowaitClause>()) {
5124     // Check if we have any device clause associated with the directive.
5125     const Expr *Device = nullptr;
5126     if (auto *C = D.getSingleClause<OMPDeviceClause>())
5127       Device = C->getDevice();
5128     // Emit device ID if any otherwise use default value.
5129     llvm::Value *DeviceID;
5130     if (Device)
5131       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
5132                                            CGF.Int64Ty, /*isSigned=*/true);
5133     else
5134       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
5135     AllocArgs.push_back(DeviceID);
5136     NewTask = CGF.EmitRuntimeCall(
5137       createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs);
5138   } else {
5139     NewTask = CGF.EmitRuntimeCall(
5140       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
5141   }
5142   llvm::Value *NewTaskNewTaskTTy =
5143       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5144           NewTask, KmpTaskTWithPrivatesPtrTy);
5145   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
5146                                                KmpTaskTWithPrivatesQTy);
5147   LValue TDBase =
5148       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
5149   // Fill the data in the resulting kmp_task_t record.
5150   // Copy shareds if there are any.
5151   Address KmpTaskSharedsPtr = Address::invalid();
5152   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
5153     KmpTaskSharedsPtr =
5154         Address(CGF.EmitLoadOfScalar(
5155                     CGF.EmitLValueForField(
5156                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
5157                                            KmpTaskTShareds)),
5158                     Loc),
5159                 CGF.getNaturalTypeAlignment(SharedsTy));
5160     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
5161     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
5162     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
5163   }
5164   // Emit initial values for private copies (if any).
5165   TaskResultTy Result;
5166   if (!Privates.empty()) {
5167     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
5168                      SharedsTy, SharedsPtrTy, Data, Privates,
5169                      /*ForDup=*/false);
5170     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
5171         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
5172       Result.TaskDupFn = emitTaskDupFunction(
5173           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
5174           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
5175           /*WithLastIter=*/!Data.LastprivateVars.empty());
5176     }
5177   }
5178   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
5179   enum { Priority = 0, Destructors = 1 };
5180   // Provide pointer to function with destructors for privates.
5181   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
5182   const RecordDecl *KmpCmplrdataUD =
5183       (*FI)->getType()->getAsUnionType()->getDecl();
5184   if (NeedsCleanup) {
5185     llvm::Value *DestructorFn = emitDestructorsFunction(
5186         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5187         KmpTaskTWithPrivatesQTy);
5188     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
5189     LValue DestructorsLV = CGF.EmitLValueForField(
5190         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
5191     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5192                               DestructorFn, KmpRoutineEntryPtrTy),
5193                           DestructorsLV);
5194   }
5195   // Set priority.
5196   if (Data.Priority.getInt()) {
5197     LValue Data2LV = CGF.EmitLValueForField(
5198         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
5199     LValue PriorityLV = CGF.EmitLValueForField(
5200         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
5201     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
5202   }
5203   Result.NewTask = NewTask;
5204   Result.TaskEntry = TaskEntry;
5205   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
5206   Result.TDBase = TDBase;
5207   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
5208   return Result;
5209 }
5210 
5211 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5212                                    const OMPExecutableDirective &D,
5213                                    llvm::Function *TaskFunction,
5214                                    QualType SharedsTy, Address Shareds,
5215                                    const Expr *IfCond,
5216                                    const OMPTaskDataTy &Data) {
5217   if (!CGF.HaveInsertPoint())
5218     return;
5219 
5220   TaskResultTy Result =
5221       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5222   llvm::Value *NewTask = Result.NewTask;
5223   llvm::Function *TaskEntry = Result.TaskEntry;
5224   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5225   LValue TDBase = Result.TDBase;
5226   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5227   ASTContext &C = CGM.getContext();
5228   // Process list of dependences.
5229   Address DependenciesArray = Address::invalid();
5230   unsigned NumDependencies = Data.Dependences.size();
5231   if (NumDependencies) {
5232     // Dependence kind for RTL.
5233     enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 };
5234     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
5235     RecordDecl *KmpDependInfoRD;
5236     QualType FlagsTy =
5237         C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
5238     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5239     if (KmpDependInfoTy.isNull()) {
5240       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
5241       KmpDependInfoRD->startDefinition();
5242       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
5243       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
5244       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
5245       KmpDependInfoRD->completeDefinition();
5246       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
5247     } else {
5248       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5249     }
5250     // Define type kmp_depend_info[<Dependences.size()>];
5251     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5252         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
5253         ArrayType::Normal, /*IndexTypeQuals=*/0);
5254     // kmp_depend_info[<Dependences.size()>] deps;
5255     DependenciesArray =
5256         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
5257     for (unsigned I = 0; I < NumDependencies; ++I) {
5258       const Expr *E = Data.Dependences[I].second;
5259       LValue Addr = CGF.EmitLValue(E);
5260       llvm::Value *Size;
5261       QualType Ty = E->getType();
5262       if (const auto *ASE =
5263               dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
5264         LValue UpAddrLVal =
5265             CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
5266         llvm::Value *UpAddr =
5267             CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
5268         llvm::Value *LowIntPtr =
5269             CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
5270         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
5271         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
5272       } else {
5273         Size = CGF.getTypeSize(Ty);
5274       }
5275       LValue Base = CGF.MakeAddrLValue(
5276           CGF.Builder.CreateConstArrayGEP(DependenciesArray, I),
5277           KmpDependInfoTy);
5278       // deps[i].base_addr = &<Dependences[i].second>;
5279       LValue BaseAddrLVal = CGF.EmitLValueForField(
5280           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5281       CGF.EmitStoreOfScalar(
5282           CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
5283           BaseAddrLVal);
5284       // deps[i].len = sizeof(<Dependences[i].second>);
5285       LValue LenLVal = CGF.EmitLValueForField(
5286           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
5287       CGF.EmitStoreOfScalar(Size, LenLVal);
5288       // deps[i].flags = <Dependences[i].first>;
5289       RTLDependenceKindTy DepKind;
5290       switch (Data.Dependences[I].first) {
5291       case OMPC_DEPEND_in:
5292         DepKind = DepIn;
5293         break;
5294       // Out and InOut dependencies must use the same code.
5295       case OMPC_DEPEND_out:
5296       case OMPC_DEPEND_inout:
5297         DepKind = DepInOut;
5298         break;
5299       case OMPC_DEPEND_mutexinoutset:
5300         DepKind = DepMutexInOutSet;
5301         break;
5302       case OMPC_DEPEND_source:
5303       case OMPC_DEPEND_sink:
5304       case OMPC_DEPEND_unknown:
5305         llvm_unreachable("Unknown task dependence type");
5306       }
5307       LValue FlagsLVal = CGF.EmitLValueForField(
5308           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5309       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5310                             FlagsLVal);
5311     }
5312     DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5313         CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy);
5314   }
5315 
5316   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5317   // libcall.
5318   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5319   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5320   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5321   // list is not empty
5322   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5323   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5324   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5325   llvm::Value *DepTaskArgs[7];
5326   if (NumDependencies) {
5327     DepTaskArgs[0] = UpLoc;
5328     DepTaskArgs[1] = ThreadID;
5329     DepTaskArgs[2] = NewTask;
5330     DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
5331     DepTaskArgs[4] = DependenciesArray.getPointer();
5332     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5333     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5334   }
5335   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
5336                         &TaskArgs,
5337                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5338     if (!Data.Tied) {
5339       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5340       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5341       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5342     }
5343     if (NumDependencies) {
5344       CGF.EmitRuntimeCall(
5345           createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
5346     } else {
5347       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
5348                           TaskArgs);
5349     }
5350     // Check if parent region is untied and build return for untied task;
5351     if (auto *Region =
5352             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5353       Region->emitUntiedSwitch(CGF);
5354   };
5355 
5356   llvm::Value *DepWaitTaskArgs[6];
5357   if (NumDependencies) {
5358     DepWaitTaskArgs[0] = UpLoc;
5359     DepWaitTaskArgs[1] = ThreadID;
5360     DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
5361     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5362     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5363     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5364   }
5365   auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
5366                         NumDependencies, &DepWaitTaskArgs,
5367                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5368     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5369     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5370     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5371     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5372     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5373     // is specified.
5374     if (NumDependencies)
5375       CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
5376                           DepWaitTaskArgs);
5377     // Call proxy_task_entry(gtid, new_task);
5378     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5379                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5380       Action.Enter(CGF);
5381       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5382       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5383                                                           OutlinedFnArgs);
5384     };
5385 
5386     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5387     // kmp_task_t *new_task);
5388     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5389     // kmp_task_t *new_task);
5390     RegionCodeGenTy RCG(CodeGen);
5391     CommonActionTy Action(
5392         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
5393         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
5394     RCG.setAction(Action);
5395     RCG(CGF);
5396   };
5397 
5398   if (IfCond) {
5399     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5400   } else {
5401     RegionCodeGenTy ThenRCG(ThenCodeGen);
5402     ThenRCG(CGF);
5403   }
5404 }
5405 
5406 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5407                                        const OMPLoopDirective &D,
5408                                        llvm::Function *TaskFunction,
5409                                        QualType SharedsTy, Address Shareds,
5410                                        const Expr *IfCond,
5411                                        const OMPTaskDataTy &Data) {
5412   if (!CGF.HaveInsertPoint())
5413     return;
5414   TaskResultTy Result =
5415       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5416   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5417   // libcall.
5418   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5419   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5420   // sched, kmp_uint64 grainsize, void *task_dup);
5421   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5422   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5423   llvm::Value *IfVal;
5424   if (IfCond) {
5425     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5426                                       /*isSigned=*/true);
5427   } else {
5428     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5429   }
5430 
5431   LValue LBLVal = CGF.EmitLValueForField(
5432       Result.TDBase,
5433       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5434   const auto *LBVar =
5435       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5436   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
5437                        /*IsInitializer=*/true);
5438   LValue UBLVal = CGF.EmitLValueForField(
5439       Result.TDBase,
5440       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5441   const auto *UBVar =
5442       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5443   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
5444                        /*IsInitializer=*/true);
5445   LValue StLVal = CGF.EmitLValueForField(
5446       Result.TDBase,
5447       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5448   const auto *StVar =
5449       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5450   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
5451                        /*IsInitializer=*/true);
5452   // Store reductions address.
5453   LValue RedLVal = CGF.EmitLValueForField(
5454       Result.TDBase,
5455       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5456   if (Data.Reductions) {
5457     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5458   } else {
5459     CGF.EmitNullInitialization(RedLVal.getAddress(),
5460                                CGF.getContext().VoidPtrTy);
5461   }
5462   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5463   llvm::Value *TaskArgs[] = {
5464       UpLoc,
5465       ThreadID,
5466       Result.NewTask,
5467       IfVal,
5468       LBLVal.getPointer(),
5469       UBLVal.getPointer(),
5470       CGF.EmitLoadOfScalar(StLVal, Loc),
5471       llvm::ConstantInt::getSigned(
5472               CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5473       llvm::ConstantInt::getSigned(
5474           CGF.IntTy, Data.Schedule.getPointer()
5475                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5476                          : NoSchedule),
5477       Data.Schedule.getPointer()
5478           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5479                                       /*isSigned=*/false)
5480           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5481       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5482                              Result.TaskDupFn, CGF.VoidPtrTy)
5483                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5484   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
5485 }
5486 
5487 /// Emit reduction operation for each element of array (required for
5488 /// array sections) LHS op = RHS.
5489 /// \param Type Type of array.
5490 /// \param LHSVar Variable on the left side of the reduction operation
5491 /// (references element of array in original variable).
5492 /// \param RHSVar Variable on the right side of the reduction operation
5493 /// (references element of array in original variable).
5494 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5495 /// RHSVar.
5496 static void EmitOMPAggregateReduction(
5497     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5498     const VarDecl *RHSVar,
5499     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5500                                   const Expr *, const Expr *)> &RedOpGen,
5501     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5502     const Expr *UpExpr = nullptr) {
5503   // Perform element-by-element initialization.
5504   QualType ElementTy;
5505   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5506   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5507 
5508   // Drill down to the base element type on both arrays.
5509   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5510   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5511 
5512   llvm::Value *RHSBegin = RHSAddr.getPointer();
5513   llvm::Value *LHSBegin = LHSAddr.getPointer();
5514   // Cast from pointer to array type to pointer to single element.
5515   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5516   // The basic structure here is a while-do loop.
5517   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5518   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5519   llvm::Value *IsEmpty =
5520       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5521   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5522 
5523   // Enter the loop body, making that address the current address.
5524   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5525   CGF.EmitBlock(BodyBB);
5526 
5527   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5528 
5529   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5530       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5531   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5532   Address RHSElementCurrent =
5533       Address(RHSElementPHI,
5534               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5535 
5536   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5537       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5538   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5539   Address LHSElementCurrent =
5540       Address(LHSElementPHI,
5541               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5542 
5543   // Emit copy.
5544   CodeGenFunction::OMPPrivateScope Scope(CGF);
5545   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5546   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5547   Scope.Privatize();
5548   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5549   Scope.ForceCleanup();
5550 
5551   // Shift the address forward by one element.
5552   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5553       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5554   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5555       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5556   // Check whether we've reached the end.
5557   llvm::Value *Done =
5558       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5559   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5560   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5561   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5562 
5563   // Done.
5564   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5565 }
5566 
5567 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5568 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5569 /// UDR combiner function.
5570 static void emitReductionCombiner(CodeGenFunction &CGF,
5571                                   const Expr *ReductionOp) {
5572   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5573     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5574       if (const auto *DRE =
5575               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5576         if (const auto *DRD =
5577                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5578           std::pair<llvm::Function *, llvm::Function *> Reduction =
5579               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5580           RValue Func = RValue::get(Reduction.first);
5581           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5582           CGF.EmitIgnoredExpr(ReductionOp);
5583           return;
5584         }
5585   CGF.EmitIgnoredExpr(ReductionOp);
5586 }
5587 
5588 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5589     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5590     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5591     ArrayRef<const Expr *> ReductionOps) {
5592   ASTContext &C = CGM.getContext();
5593 
5594   // void reduction_func(void *LHSArg, void *RHSArg);
5595   FunctionArgList Args;
5596   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5597                            ImplicitParamDecl::Other);
5598   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5599                            ImplicitParamDecl::Other);
5600   Args.push_back(&LHSArg);
5601   Args.push_back(&RHSArg);
5602   const auto &CGFI =
5603       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5604   std::string Name = getName({"omp", "reduction", "reduction_func"});
5605   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5606                                     llvm::GlobalValue::InternalLinkage, Name,
5607                                     &CGM.getModule());
5608   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5609   Fn->setDoesNotRecurse();
5610   CodeGenFunction CGF(CGM);
5611   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5612 
5613   // Dst = (void*[n])(LHSArg);
5614   // Src = (void*[n])(RHSArg);
5615   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5616       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5617       ArgsType), CGF.getPointerAlign());
5618   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5619       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5620       ArgsType), CGF.getPointerAlign());
5621 
5622   //  ...
5623   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5624   //  ...
5625   CodeGenFunction::OMPPrivateScope Scope(CGF);
5626   auto IPriv = Privates.begin();
5627   unsigned Idx = 0;
5628   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5629     const auto *RHSVar =
5630         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5631     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5632       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5633     });
5634     const auto *LHSVar =
5635         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5636     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5637       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5638     });
5639     QualType PrivTy = (*IPriv)->getType();
5640     if (PrivTy->isVariablyModifiedType()) {
5641       // Get array size and emit VLA type.
5642       ++Idx;
5643       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5644       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5645       const VariableArrayType *VLA =
5646           CGF.getContext().getAsVariableArrayType(PrivTy);
5647       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5648       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5649           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5650       CGF.EmitVariablyModifiedType(PrivTy);
5651     }
5652   }
5653   Scope.Privatize();
5654   IPriv = Privates.begin();
5655   auto ILHS = LHSExprs.begin();
5656   auto IRHS = RHSExprs.begin();
5657   for (const Expr *E : ReductionOps) {
5658     if ((*IPriv)->getType()->isArrayType()) {
5659       // Emit reduction for array section.
5660       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5661       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5662       EmitOMPAggregateReduction(
5663           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5664           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5665             emitReductionCombiner(CGF, E);
5666           });
5667     } else {
5668       // Emit reduction for array subscript or single variable.
5669       emitReductionCombiner(CGF, E);
5670     }
5671     ++IPriv;
5672     ++ILHS;
5673     ++IRHS;
5674   }
5675   Scope.ForceCleanup();
5676   CGF.FinishFunction();
5677   return Fn;
5678 }
5679 
5680 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5681                                                   const Expr *ReductionOp,
5682                                                   const Expr *PrivateRef,
5683                                                   const DeclRefExpr *LHS,
5684                                                   const DeclRefExpr *RHS) {
5685   if (PrivateRef->getType()->isArrayType()) {
5686     // Emit reduction for array section.
5687     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5688     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5689     EmitOMPAggregateReduction(
5690         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5691         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5692           emitReductionCombiner(CGF, ReductionOp);
5693         });
5694   } else {
5695     // Emit reduction for array subscript or single variable.
5696     emitReductionCombiner(CGF, ReductionOp);
5697   }
5698 }
5699 
5700 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5701                                     ArrayRef<const Expr *> Privates,
5702                                     ArrayRef<const Expr *> LHSExprs,
5703                                     ArrayRef<const Expr *> RHSExprs,
5704                                     ArrayRef<const Expr *> ReductionOps,
5705                                     ReductionOptionsTy Options) {
5706   if (!CGF.HaveInsertPoint())
5707     return;
5708 
5709   bool WithNowait = Options.WithNowait;
5710   bool SimpleReduction = Options.SimpleReduction;
5711 
5712   // Next code should be emitted for reduction:
5713   //
5714   // static kmp_critical_name lock = { 0 };
5715   //
5716   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5717   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5718   //  ...
5719   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5720   //  *(Type<n>-1*)rhs[<n>-1]);
5721   // }
5722   //
5723   // ...
5724   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5725   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5726   // RedList, reduce_func, &<lock>)) {
5727   // case 1:
5728   //  ...
5729   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5730   //  ...
5731   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5732   // break;
5733   // case 2:
5734   //  ...
5735   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5736   //  ...
5737   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5738   // break;
5739   // default:;
5740   // }
5741   //
5742   // if SimpleReduction is true, only the next code is generated:
5743   //  ...
5744   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5745   //  ...
5746 
5747   ASTContext &C = CGM.getContext();
5748 
5749   if (SimpleReduction) {
5750     CodeGenFunction::RunCleanupsScope Scope(CGF);
5751     auto IPriv = Privates.begin();
5752     auto ILHS = LHSExprs.begin();
5753     auto IRHS = RHSExprs.begin();
5754     for (const Expr *E : ReductionOps) {
5755       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5756                                   cast<DeclRefExpr>(*IRHS));
5757       ++IPriv;
5758       ++ILHS;
5759       ++IRHS;
5760     }
5761     return;
5762   }
5763 
5764   // 1. Build a list of reduction variables.
5765   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5766   auto Size = RHSExprs.size();
5767   for (const Expr *E : Privates) {
5768     if (E->getType()->isVariablyModifiedType())
5769       // Reserve place for array size.
5770       ++Size;
5771   }
5772   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5773   QualType ReductionArrayTy =
5774       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
5775                              /*IndexTypeQuals=*/0);
5776   Address ReductionList =
5777       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5778   auto IPriv = Privates.begin();
5779   unsigned Idx = 0;
5780   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5781     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5782     CGF.Builder.CreateStore(
5783         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5784             CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
5785         Elem);
5786     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5787       // Store array size.
5788       ++Idx;
5789       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5790       llvm::Value *Size = CGF.Builder.CreateIntCast(
5791           CGF.getVLASize(
5792                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5793               .NumElts,
5794           CGF.SizeTy, /*isSigned=*/false);
5795       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5796                               Elem);
5797     }
5798   }
5799 
5800   // 2. Emit reduce_func().
5801   llvm::Function *ReductionFn = emitReductionFunction(
5802       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5803       LHSExprs, RHSExprs, ReductionOps);
5804 
5805   // 3. Create static kmp_critical_name lock = { 0 };
5806   std::string Name = getName({"reduction"});
5807   llvm::Value *Lock = getCriticalRegionLock(Name);
5808 
5809   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5810   // RedList, reduce_func, &<lock>);
5811   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5812   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5813   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5814   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5815       ReductionList.getPointer(), CGF.VoidPtrTy);
5816   llvm::Value *Args[] = {
5817       IdentTLoc,                             // ident_t *<loc>
5818       ThreadId,                              // i32 <gtid>
5819       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5820       ReductionArrayTySize,                  // size_type sizeof(RedList)
5821       RL,                                    // void *RedList
5822       ReductionFn, // void (*) (void *, void *) <reduce_func>
5823       Lock         // kmp_critical_name *&<lock>
5824   };
5825   llvm::Value *Res = CGF.EmitRuntimeCall(
5826       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
5827                                        : OMPRTL__kmpc_reduce),
5828       Args);
5829 
5830   // 5. Build switch(res)
5831   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5832   llvm::SwitchInst *SwInst =
5833       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5834 
5835   // 6. Build case 1:
5836   //  ...
5837   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5838   //  ...
5839   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5840   // break;
5841   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5842   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5843   CGF.EmitBlock(Case1BB);
5844 
5845   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5846   llvm::Value *EndArgs[] = {
5847       IdentTLoc, // ident_t *<loc>
5848       ThreadId,  // i32 <gtid>
5849       Lock       // kmp_critical_name *&<lock>
5850   };
5851   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5852                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5853     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5854     auto IPriv = Privates.begin();
5855     auto ILHS = LHSExprs.begin();
5856     auto IRHS = RHSExprs.begin();
5857     for (const Expr *E : ReductionOps) {
5858       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5859                                      cast<DeclRefExpr>(*IRHS));
5860       ++IPriv;
5861       ++ILHS;
5862       ++IRHS;
5863     }
5864   };
5865   RegionCodeGenTy RCG(CodeGen);
5866   CommonActionTy Action(
5867       nullptr, llvm::None,
5868       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
5869                                        : OMPRTL__kmpc_end_reduce),
5870       EndArgs);
5871   RCG.setAction(Action);
5872   RCG(CGF);
5873 
5874   CGF.EmitBranch(DefaultBB);
5875 
5876   // 7. Build case 2:
5877   //  ...
5878   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5879   //  ...
5880   // break;
5881   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5882   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5883   CGF.EmitBlock(Case2BB);
5884 
5885   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5886                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5887     auto ILHS = LHSExprs.begin();
5888     auto IRHS = RHSExprs.begin();
5889     auto IPriv = Privates.begin();
5890     for (const Expr *E : ReductionOps) {
5891       const Expr *XExpr = nullptr;
5892       const Expr *EExpr = nullptr;
5893       const Expr *UpExpr = nullptr;
5894       BinaryOperatorKind BO = BO_Comma;
5895       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5896         if (BO->getOpcode() == BO_Assign) {
5897           XExpr = BO->getLHS();
5898           UpExpr = BO->getRHS();
5899         }
5900       }
5901       // Try to emit update expression as a simple atomic.
5902       const Expr *RHSExpr = UpExpr;
5903       if (RHSExpr) {
5904         // Analyze RHS part of the whole expression.
5905         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5906                 RHSExpr->IgnoreParenImpCasts())) {
5907           // If this is a conditional operator, analyze its condition for
5908           // min/max reduction operator.
5909           RHSExpr = ACO->getCond();
5910         }
5911         if (const auto *BORHS =
5912                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5913           EExpr = BORHS->getRHS();
5914           BO = BORHS->getOpcode();
5915         }
5916       }
5917       if (XExpr) {
5918         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5919         auto &&AtomicRedGen = [BO, VD,
5920                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5921                                     const Expr *EExpr, const Expr *UpExpr) {
5922           LValue X = CGF.EmitLValue(XExpr);
5923           RValue E;
5924           if (EExpr)
5925             E = CGF.EmitAnyExpr(EExpr);
5926           CGF.EmitOMPAtomicSimpleUpdateExpr(
5927               X, E, BO, /*IsXLHSInRHSPart=*/true,
5928               llvm::AtomicOrdering::Monotonic, Loc,
5929               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5930                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5931                 PrivateScope.addPrivate(
5932                     VD, [&CGF, VD, XRValue, Loc]() {
5933                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5934                       CGF.emitOMPSimpleStore(
5935                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5936                           VD->getType().getNonReferenceType(), Loc);
5937                       return LHSTemp;
5938                     });
5939                 (void)PrivateScope.Privatize();
5940                 return CGF.EmitAnyExpr(UpExpr);
5941               });
5942         };
5943         if ((*IPriv)->getType()->isArrayType()) {
5944           // Emit atomic reduction for array section.
5945           const auto *RHSVar =
5946               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5947           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5948                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5949         } else {
5950           // Emit atomic reduction for array subscript or single variable.
5951           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5952         }
5953       } else {
5954         // Emit as a critical region.
5955         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5956                                            const Expr *, const Expr *) {
5957           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5958           std::string Name = RT.getName({"atomic_reduction"});
5959           RT.emitCriticalRegion(
5960               CGF, Name,
5961               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5962                 Action.Enter(CGF);
5963                 emitReductionCombiner(CGF, E);
5964               },
5965               Loc);
5966         };
5967         if ((*IPriv)->getType()->isArrayType()) {
5968           const auto *LHSVar =
5969               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5970           const auto *RHSVar =
5971               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5972           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5973                                     CritRedGen);
5974         } else {
5975           CritRedGen(CGF, nullptr, nullptr, nullptr);
5976         }
5977       }
5978       ++ILHS;
5979       ++IRHS;
5980       ++IPriv;
5981     }
5982   };
5983   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5984   if (!WithNowait) {
5985     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5986     llvm::Value *EndArgs[] = {
5987         IdentTLoc, // ident_t *<loc>
5988         ThreadId,  // i32 <gtid>
5989         Lock       // kmp_critical_name *&<lock>
5990     };
5991     CommonActionTy Action(nullptr, llvm::None,
5992                           createRuntimeFunction(OMPRTL__kmpc_end_reduce),
5993                           EndArgs);
5994     AtomicRCG.setAction(Action);
5995     AtomicRCG(CGF);
5996   } else {
5997     AtomicRCG(CGF);
5998   }
5999 
6000   CGF.EmitBranch(DefaultBB);
6001   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
6002 }
6003 
6004 /// Generates unique name for artificial threadprivate variables.
6005 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
6006 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
6007                                       const Expr *Ref) {
6008   SmallString<256> Buffer;
6009   llvm::raw_svector_ostream Out(Buffer);
6010   const clang::DeclRefExpr *DE;
6011   const VarDecl *D = ::getBaseDecl(Ref, DE);
6012   if (!D)
6013     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
6014   D = D->getCanonicalDecl();
6015   std::string Name = CGM.getOpenMPRuntime().getName(
6016       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
6017   Out << Prefix << Name << "_"
6018       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
6019   return Out.str();
6020 }
6021 
6022 /// Emits reduction initializer function:
6023 /// \code
6024 /// void @.red_init(void* %arg) {
6025 /// %0 = bitcast void* %arg to <type>*
6026 /// store <type> <init>, <type>* %0
6027 /// ret void
6028 /// }
6029 /// \endcode
6030 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
6031                                            SourceLocation Loc,
6032                                            ReductionCodeGen &RCG, unsigned N) {
6033   ASTContext &C = CGM.getContext();
6034   FunctionArgList Args;
6035   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6036                           ImplicitParamDecl::Other);
6037   Args.emplace_back(&Param);
6038   const auto &FnInfo =
6039       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6040   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6041   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
6042   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6043                                     Name, &CGM.getModule());
6044   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6045   Fn->setDoesNotRecurse();
6046   CodeGenFunction CGF(CGM);
6047   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6048   Address PrivateAddr = CGF.EmitLoadOfPointer(
6049       CGF.GetAddrOfLocalVar(&Param),
6050       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6051   llvm::Value *Size = nullptr;
6052   // If the size of the reduction item is non-constant, load it from global
6053   // threadprivate variable.
6054   if (RCG.getSizes(N).second) {
6055     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6056         CGF, CGM.getContext().getSizeType(),
6057         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6058     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6059                                 CGM.getContext().getSizeType(), Loc);
6060   }
6061   RCG.emitAggregateType(CGF, N, Size);
6062   LValue SharedLVal;
6063   // If initializer uses initializer from declare reduction construct, emit a
6064   // pointer to the address of the original reduction item (reuired by reduction
6065   // initializer)
6066   if (RCG.usesReductionInitializer(N)) {
6067     Address SharedAddr =
6068         CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6069             CGF, CGM.getContext().VoidPtrTy,
6070             generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6071     SharedAddr = CGF.EmitLoadOfPointer(
6072         SharedAddr,
6073         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
6074     SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
6075   } else {
6076     SharedLVal = CGF.MakeNaturalAlignAddrLValue(
6077         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
6078         CGM.getContext().VoidPtrTy);
6079   }
6080   // Emit the initializer:
6081   // %0 = bitcast void* %arg to <type>*
6082   // store <type> <init>, <type>* %0
6083   RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
6084                          [](CodeGenFunction &) { return false; });
6085   CGF.FinishFunction();
6086   return Fn;
6087 }
6088 
6089 /// Emits reduction combiner function:
6090 /// \code
6091 /// void @.red_comb(void* %arg0, void* %arg1) {
6092 /// %lhs = bitcast void* %arg0 to <type>*
6093 /// %rhs = bitcast void* %arg1 to <type>*
6094 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
6095 /// store <type> %2, <type>* %lhs
6096 /// ret void
6097 /// }
6098 /// \endcode
6099 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
6100                                            SourceLocation Loc,
6101                                            ReductionCodeGen &RCG, unsigned N,
6102                                            const Expr *ReductionOp,
6103                                            const Expr *LHS, const Expr *RHS,
6104                                            const Expr *PrivateRef) {
6105   ASTContext &C = CGM.getContext();
6106   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
6107   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
6108   FunctionArgList Args;
6109   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
6110                                C.VoidPtrTy, ImplicitParamDecl::Other);
6111   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6112                             ImplicitParamDecl::Other);
6113   Args.emplace_back(&ParamInOut);
6114   Args.emplace_back(&ParamIn);
6115   const auto &FnInfo =
6116       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6117   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6118   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
6119   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6120                                     Name, &CGM.getModule());
6121   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6122   Fn->setDoesNotRecurse();
6123   CodeGenFunction CGF(CGM);
6124   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6125   llvm::Value *Size = nullptr;
6126   // If the size of the reduction item is non-constant, load it from global
6127   // threadprivate variable.
6128   if (RCG.getSizes(N).second) {
6129     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6130         CGF, CGM.getContext().getSizeType(),
6131         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6132     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6133                                 CGM.getContext().getSizeType(), Loc);
6134   }
6135   RCG.emitAggregateType(CGF, N, Size);
6136   // Remap lhs and rhs variables to the addresses of the function arguments.
6137   // %lhs = bitcast void* %arg0 to <type>*
6138   // %rhs = bitcast void* %arg1 to <type>*
6139   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6140   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
6141     // Pull out the pointer to the variable.
6142     Address PtrAddr = CGF.EmitLoadOfPointer(
6143         CGF.GetAddrOfLocalVar(&ParamInOut),
6144         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6145     return CGF.Builder.CreateElementBitCast(
6146         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
6147   });
6148   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
6149     // Pull out the pointer to the variable.
6150     Address PtrAddr = CGF.EmitLoadOfPointer(
6151         CGF.GetAddrOfLocalVar(&ParamIn),
6152         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6153     return CGF.Builder.CreateElementBitCast(
6154         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6155   });
6156   PrivateScope.Privatize();
6157   // Emit the combiner body:
6158   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6159   // store <type> %2, <type>* %lhs
6160   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6161       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6162       cast<DeclRefExpr>(RHS));
6163   CGF.FinishFunction();
6164   return Fn;
6165 }
6166 
6167 /// Emits reduction finalizer function:
6168 /// \code
6169 /// void @.red_fini(void* %arg) {
6170 /// %0 = bitcast void* %arg to <type>*
6171 /// <destroy>(<type>* %0)
6172 /// ret void
6173 /// }
6174 /// \endcode
6175 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6176                                            SourceLocation Loc,
6177                                            ReductionCodeGen &RCG, unsigned N) {
6178   if (!RCG.needCleanups(N))
6179     return nullptr;
6180   ASTContext &C = CGM.getContext();
6181   FunctionArgList Args;
6182   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6183                           ImplicitParamDecl::Other);
6184   Args.emplace_back(&Param);
6185   const auto &FnInfo =
6186       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6187   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6188   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6189   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6190                                     Name, &CGM.getModule());
6191   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6192   Fn->setDoesNotRecurse();
6193   CodeGenFunction CGF(CGM);
6194   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6195   Address PrivateAddr = CGF.EmitLoadOfPointer(
6196       CGF.GetAddrOfLocalVar(&Param),
6197       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6198   llvm::Value *Size = nullptr;
6199   // If the size of the reduction item is non-constant, load it from global
6200   // threadprivate variable.
6201   if (RCG.getSizes(N).second) {
6202     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6203         CGF, CGM.getContext().getSizeType(),
6204         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6205     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6206                                 CGM.getContext().getSizeType(), Loc);
6207   }
6208   RCG.emitAggregateType(CGF, N, Size);
6209   // Emit the finalizer body:
6210   // <destroy>(<type>* %0)
6211   RCG.emitCleanups(CGF, N, PrivateAddr);
6212   CGF.FinishFunction();
6213   return Fn;
6214 }
6215 
6216 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6217     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6218     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6219   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6220     return nullptr;
6221 
6222   // Build typedef struct:
6223   // kmp_task_red_input {
6224   //   void *reduce_shar; // shared reduction item
6225   //   size_t reduce_size; // size of data item
6226   //   void *reduce_init; // data initialization routine
6227   //   void *reduce_fini; // data finalization routine
6228   //   void *reduce_comb; // data combiner routine
6229   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6230   // } kmp_task_red_input_t;
6231   ASTContext &C = CGM.getContext();
6232   RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t");
6233   RD->startDefinition();
6234   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6235   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6236   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6237   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6238   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6239   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6240       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6241   RD->completeDefinition();
6242   QualType RDType = C.getRecordType(RD);
6243   unsigned Size = Data.ReductionVars.size();
6244   llvm::APInt ArraySize(/*numBits=*/64, Size);
6245   QualType ArrayRDType = C.getConstantArrayType(
6246       RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0);
6247   // kmp_task_red_input_t .rd_input.[Size];
6248   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6249   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
6250                        Data.ReductionOps);
6251   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6252     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6253     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6254                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6255     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6256         TaskRedInput.getPointer(), Idxs,
6257         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6258         ".rd_input.gep.");
6259     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6260     // ElemLVal.reduce_shar = &Shareds[Cnt];
6261     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6262     RCG.emitSharedLValue(CGF, Cnt);
6263     llvm::Value *CastedShared =
6264         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer());
6265     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6266     RCG.emitAggregateType(CGF, Cnt);
6267     llvm::Value *SizeValInChars;
6268     llvm::Value *SizeVal;
6269     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6270     // We use delayed creation/initialization for VLAs, array sections and
6271     // custom reduction initializations. It is required because runtime does not
6272     // provide the way to pass the sizes of VLAs/array sections to
6273     // initializer/combiner/finalizer functions and does not pass the pointer to
6274     // original reduction item to the initializer. Instead threadprivate global
6275     // variables are used to store these values and use them in the functions.
6276     bool DelayedCreation = !!SizeVal;
6277     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6278                                                /*isSigned=*/false);
6279     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6280     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6281     // ElemLVal.reduce_init = init;
6282     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6283     llvm::Value *InitAddr =
6284         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6285     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6286     DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
6287     // ElemLVal.reduce_fini = fini;
6288     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6289     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6290     llvm::Value *FiniAddr = Fini
6291                                 ? CGF.EmitCastToVoidPtr(Fini)
6292                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6293     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6294     // ElemLVal.reduce_comb = comb;
6295     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6296     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6297         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6298         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6299     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6300     // ElemLVal.flags = 0;
6301     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6302     if (DelayedCreation) {
6303       CGF.EmitStoreOfScalar(
6304           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6305           FlagsLVal);
6306     } else
6307       CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
6308   }
6309   // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
6310   // *data);
6311   llvm::Value *Args[] = {
6312       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6313                                 /*isSigned=*/true),
6314       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6315       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6316                                                       CGM.VoidPtrTy)};
6317   return CGF.EmitRuntimeCall(
6318       createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args);
6319 }
6320 
6321 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6322                                               SourceLocation Loc,
6323                                               ReductionCodeGen &RCG,
6324                                               unsigned N) {
6325   auto Sizes = RCG.getSizes(N);
6326   // Emit threadprivate global variable if the type is non-constant
6327   // (Sizes.second = nullptr).
6328   if (Sizes.second) {
6329     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6330                                                      /*isSigned=*/false);
6331     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6332         CGF, CGM.getContext().getSizeType(),
6333         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6334     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6335   }
6336   // Store address of the original reduction item if custom initializer is used.
6337   if (RCG.usesReductionInitializer(N)) {
6338     Address SharedAddr = getAddrOfArtificialThreadPrivate(
6339         CGF, CGM.getContext().VoidPtrTy,
6340         generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6341     CGF.Builder.CreateStore(
6342         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6343             RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy),
6344         SharedAddr, /*IsVolatile=*/false);
6345   }
6346 }
6347 
6348 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6349                                               SourceLocation Loc,
6350                                               llvm::Value *ReductionsPtr,
6351                                               LValue SharedLVal) {
6352   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6353   // *d);
6354   llvm::Value *Args[] = {
6355       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6356                                 /*isSigned=*/true),
6357       ReductionsPtr,
6358       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(),
6359                                                       CGM.VoidPtrTy)};
6360   return Address(
6361       CGF.EmitRuntimeCall(
6362           createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
6363       SharedLVal.getAlignment());
6364 }
6365 
6366 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6367                                        SourceLocation Loc) {
6368   if (!CGF.HaveInsertPoint())
6369     return;
6370   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6371   // global_tid);
6372   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6373   // Ignore return result until untied tasks are supported.
6374   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
6375   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6376     Region->emitUntiedSwitch(CGF);
6377 }
6378 
6379 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6380                                            OpenMPDirectiveKind InnerKind,
6381                                            const RegionCodeGenTy &CodeGen,
6382                                            bool HasCancel) {
6383   if (!CGF.HaveInsertPoint())
6384     return;
6385   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6386   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6387 }
6388 
6389 namespace {
6390 enum RTCancelKind {
6391   CancelNoreq = 0,
6392   CancelParallel = 1,
6393   CancelLoop = 2,
6394   CancelSections = 3,
6395   CancelTaskgroup = 4
6396 };
6397 } // anonymous namespace
6398 
6399 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6400   RTCancelKind CancelKind = CancelNoreq;
6401   if (CancelRegion == OMPD_parallel)
6402     CancelKind = CancelParallel;
6403   else if (CancelRegion == OMPD_for)
6404     CancelKind = CancelLoop;
6405   else if (CancelRegion == OMPD_sections)
6406     CancelKind = CancelSections;
6407   else {
6408     assert(CancelRegion == OMPD_taskgroup);
6409     CancelKind = CancelTaskgroup;
6410   }
6411   return CancelKind;
6412 }
6413 
6414 void CGOpenMPRuntime::emitCancellationPointCall(
6415     CodeGenFunction &CGF, SourceLocation Loc,
6416     OpenMPDirectiveKind CancelRegion) {
6417   if (!CGF.HaveInsertPoint())
6418     return;
6419   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6420   // global_tid, kmp_int32 cncl_kind);
6421   if (auto *OMPRegionInfo =
6422           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6423     // For 'cancellation point taskgroup', the task region info may not have a
6424     // cancel. This may instead happen in another adjacent task.
6425     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6426       llvm::Value *Args[] = {
6427           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6428           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6429       // Ignore return result until untied tasks are supported.
6430       llvm::Value *Result = CGF.EmitRuntimeCall(
6431           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
6432       // if (__kmpc_cancellationpoint()) {
6433       //   exit from construct;
6434       // }
6435       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6436       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6437       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6438       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6439       CGF.EmitBlock(ExitBB);
6440       // exit from construct;
6441       CodeGenFunction::JumpDest CancelDest =
6442           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6443       CGF.EmitBranchThroughCleanup(CancelDest);
6444       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6445     }
6446   }
6447 }
6448 
6449 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6450                                      const Expr *IfCond,
6451                                      OpenMPDirectiveKind CancelRegion) {
6452   if (!CGF.HaveInsertPoint())
6453     return;
6454   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6455   // kmp_int32 cncl_kind);
6456   if (auto *OMPRegionInfo =
6457           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6458     auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
6459                                                         PrePostActionTy &) {
6460       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6461       llvm::Value *Args[] = {
6462           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6463           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6464       // Ignore return result until untied tasks are supported.
6465       llvm::Value *Result = CGF.EmitRuntimeCall(
6466           RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
6467       // if (__kmpc_cancel()) {
6468       //   exit from construct;
6469       // }
6470       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6471       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6472       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6473       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6474       CGF.EmitBlock(ExitBB);
6475       // exit from construct;
6476       CodeGenFunction::JumpDest CancelDest =
6477           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6478       CGF.EmitBranchThroughCleanup(CancelDest);
6479       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6480     };
6481     if (IfCond) {
6482       emitOMPIfClause(CGF, IfCond, ThenGen,
6483                       [](CodeGenFunction &, PrePostActionTy &) {});
6484     } else {
6485       RegionCodeGenTy ThenRCG(ThenGen);
6486       ThenRCG(CGF);
6487     }
6488   }
6489 }
6490 
6491 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6492     const OMPExecutableDirective &D, StringRef ParentName,
6493     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6494     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6495   assert(!ParentName.empty() && "Invalid target region parent name!");
6496   HasEmittedTargetRegion = true;
6497   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6498                                    IsOffloadEntry, CodeGen);
6499 }
6500 
6501 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6502     const OMPExecutableDirective &D, StringRef ParentName,
6503     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6504     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6505   // Create a unique name for the entry function using the source location
6506   // information of the current target region. The name will be something like:
6507   //
6508   // __omp_offloading_DD_FFFF_PP_lBB
6509   //
6510   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6511   // mangled name of the function that encloses the target region and BB is the
6512   // line number of the target region.
6513 
6514   unsigned DeviceID;
6515   unsigned FileID;
6516   unsigned Line;
6517   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6518                            Line);
6519   SmallString<64> EntryFnName;
6520   {
6521     llvm::raw_svector_ostream OS(EntryFnName);
6522     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6523        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6524   }
6525 
6526   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6527 
6528   CodeGenFunction CGF(CGM, true);
6529   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6530   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6531 
6532   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
6533 
6534   // If this target outline function is not an offload entry, we don't need to
6535   // register it.
6536   if (!IsOffloadEntry)
6537     return;
6538 
6539   // The target region ID is used by the runtime library to identify the current
6540   // target region, so it only has to be unique and not necessarily point to
6541   // anything. It could be the pointer to the outlined function that implements
6542   // the target region, but we aren't using that so that the compiler doesn't
6543   // need to keep that, and could therefore inline the host function if proven
6544   // worthwhile during optimization. In the other hand, if emitting code for the
6545   // device, the ID has to be the function address so that it can retrieved from
6546   // the offloading entry and launched by the runtime library. We also mark the
6547   // outlined function to have external linkage in case we are emitting code for
6548   // the device, because these functions will be entry points to the device.
6549 
6550   if (CGM.getLangOpts().OpenMPIsDevice) {
6551     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6552     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6553     OutlinedFn->setDSOLocal(false);
6554   } else {
6555     std::string Name = getName({EntryFnName, "region_id"});
6556     OutlinedFnID = new llvm::GlobalVariable(
6557         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6558         llvm::GlobalValue::WeakAnyLinkage,
6559         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6560   }
6561 
6562   // Register the information for the entry associated with this target region.
6563   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6564       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6565       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6566 }
6567 
6568 /// Checks if the expression is constant or does not have non-trivial function
6569 /// calls.
6570 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6571   // We can skip constant expressions.
6572   // We can skip expressions with trivial calls or simple expressions.
6573   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6574           !E->hasNonTrivialCall(Ctx)) &&
6575          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6576 }
6577 
6578 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6579                                                     const Stmt *Body) {
6580   const Stmt *Child = Body->IgnoreContainers();
6581   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6582     Child = nullptr;
6583     for (const Stmt *S : C->body()) {
6584       if (const auto *E = dyn_cast<Expr>(S)) {
6585         if (isTrivial(Ctx, E))
6586           continue;
6587       }
6588       // Some of the statements can be ignored.
6589       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6590           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6591         continue;
6592       // Analyze declarations.
6593       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6594         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6595               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6596                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6597                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6598                   isa<UsingDirectiveDecl>(D) ||
6599                   isa<OMPDeclareReductionDecl>(D) ||
6600                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6601                 return true;
6602               const auto *VD = dyn_cast<VarDecl>(D);
6603               if (!VD)
6604                 return false;
6605               return VD->isConstexpr() ||
6606                      ((VD->getType().isTrivialType(Ctx) ||
6607                        VD->getType()->isReferenceType()) &&
6608                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6609             }))
6610           continue;
6611       }
6612       // Found multiple children - cannot get the one child only.
6613       if (Child)
6614         return nullptr;
6615       Child = S;
6616     }
6617     if (Child)
6618       Child = Child->IgnoreContainers();
6619   }
6620   return Child;
6621 }
6622 
6623 /// Emit the number of teams for a target directive.  Inspect the num_teams
6624 /// clause associated with a teams construct combined or closely nested
6625 /// with the target directive.
6626 ///
6627 /// Emit a team of size one for directives such as 'target parallel' that
6628 /// have no associated teams construct.
6629 ///
6630 /// Otherwise, return nullptr.
6631 static llvm::Value *
6632 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6633                                const OMPExecutableDirective &D) {
6634   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6635          "Clauses associated with the teams directive expected to be emitted "
6636          "only for the host!");
6637   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6638   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6639          "Expected target-based executable directive.");
6640   CGBuilderTy &Bld = CGF.Builder;
6641   switch (DirectiveKind) {
6642   case OMPD_target: {
6643     const auto *CS = D.getInnermostCapturedStmt();
6644     const auto *Body =
6645         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6646     const Stmt *ChildStmt =
6647         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6648     if (const auto *NestedDir =
6649             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6650       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6651         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6652           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6653           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6654           const Expr *NumTeams =
6655               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6656           llvm::Value *NumTeamsVal =
6657               CGF.EmitScalarExpr(NumTeams,
6658                                  /*IgnoreResultAssign*/ true);
6659           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6660                                    /*isSigned=*/true);
6661         }
6662         return Bld.getInt32(0);
6663       }
6664       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6665           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6666         return Bld.getInt32(1);
6667       return Bld.getInt32(0);
6668     }
6669     return nullptr;
6670   }
6671   case OMPD_target_teams:
6672   case OMPD_target_teams_distribute:
6673   case OMPD_target_teams_distribute_simd:
6674   case OMPD_target_teams_distribute_parallel_for:
6675   case OMPD_target_teams_distribute_parallel_for_simd: {
6676     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6677       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6678       const Expr *NumTeams =
6679           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6680       llvm::Value *NumTeamsVal =
6681           CGF.EmitScalarExpr(NumTeams,
6682                              /*IgnoreResultAssign*/ true);
6683       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6684                                /*isSigned=*/true);
6685     }
6686     return Bld.getInt32(0);
6687   }
6688   case OMPD_target_parallel:
6689   case OMPD_target_parallel_for:
6690   case OMPD_target_parallel_for_simd:
6691   case OMPD_target_simd:
6692     return Bld.getInt32(1);
6693   case OMPD_parallel:
6694   case OMPD_for:
6695   case OMPD_parallel_for:
6696   case OMPD_parallel_sections:
6697   case OMPD_for_simd:
6698   case OMPD_parallel_for_simd:
6699   case OMPD_cancel:
6700   case OMPD_cancellation_point:
6701   case OMPD_ordered:
6702   case OMPD_threadprivate:
6703   case OMPD_allocate:
6704   case OMPD_task:
6705   case OMPD_simd:
6706   case OMPD_sections:
6707   case OMPD_section:
6708   case OMPD_single:
6709   case OMPD_master:
6710   case OMPD_critical:
6711   case OMPD_taskyield:
6712   case OMPD_barrier:
6713   case OMPD_taskwait:
6714   case OMPD_taskgroup:
6715   case OMPD_atomic:
6716   case OMPD_flush:
6717   case OMPD_teams:
6718   case OMPD_target_data:
6719   case OMPD_target_exit_data:
6720   case OMPD_target_enter_data:
6721   case OMPD_distribute:
6722   case OMPD_distribute_simd:
6723   case OMPD_distribute_parallel_for:
6724   case OMPD_distribute_parallel_for_simd:
6725   case OMPD_teams_distribute:
6726   case OMPD_teams_distribute_simd:
6727   case OMPD_teams_distribute_parallel_for:
6728   case OMPD_teams_distribute_parallel_for_simd:
6729   case OMPD_target_update:
6730   case OMPD_declare_simd:
6731   case OMPD_declare_target:
6732   case OMPD_end_declare_target:
6733   case OMPD_declare_reduction:
6734   case OMPD_declare_mapper:
6735   case OMPD_taskloop:
6736   case OMPD_taskloop_simd:
6737   case OMPD_requires:
6738   case OMPD_unknown:
6739     break;
6740   }
6741   llvm_unreachable("Unexpected directive kind.");
6742 }
6743 
6744 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6745                                   llvm::Value *DefaultThreadLimitVal) {
6746   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6747       CGF.getContext(), CS->getCapturedStmt());
6748   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6749     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6750       llvm::Value *NumThreads = nullptr;
6751       llvm::Value *CondVal = nullptr;
6752       // Handle if clause. If if clause present, the number of threads is
6753       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6754       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6755         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6756         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6757         const OMPIfClause *IfClause = nullptr;
6758         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6759           if (C->getNameModifier() == OMPD_unknown ||
6760               C->getNameModifier() == OMPD_parallel) {
6761             IfClause = C;
6762             break;
6763           }
6764         }
6765         if (IfClause) {
6766           const Expr *Cond = IfClause->getCondition();
6767           bool Result;
6768           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6769             if (!Result)
6770               return CGF.Builder.getInt32(1);
6771           } else {
6772             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6773             if (const auto *PreInit =
6774                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6775               for (const auto *I : PreInit->decls()) {
6776                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6777                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6778                 } else {
6779                   CodeGenFunction::AutoVarEmission Emission =
6780                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6781                   CGF.EmitAutoVarCleanups(Emission);
6782                 }
6783               }
6784             }
6785             CondVal = CGF.EvaluateExprAsBool(Cond);
6786           }
6787         }
6788       }
6789       // Check the value of num_threads clause iff if clause was not specified
6790       // or is not evaluated to false.
6791       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6792         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6793         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6794         const auto *NumThreadsClause =
6795             Dir->getSingleClause<OMPNumThreadsClause>();
6796         CodeGenFunction::LexicalScope Scope(
6797             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6798         if (const auto *PreInit =
6799                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6800           for (const auto *I : PreInit->decls()) {
6801             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6802               CGF.EmitVarDecl(cast<VarDecl>(*I));
6803             } else {
6804               CodeGenFunction::AutoVarEmission Emission =
6805                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6806               CGF.EmitAutoVarCleanups(Emission);
6807             }
6808           }
6809         }
6810         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6811         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6812                                                /*isSigned=*/false);
6813         if (DefaultThreadLimitVal)
6814           NumThreads = CGF.Builder.CreateSelect(
6815               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6816               DefaultThreadLimitVal, NumThreads);
6817       } else {
6818         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6819                                            : CGF.Builder.getInt32(0);
6820       }
6821       // Process condition of the if clause.
6822       if (CondVal) {
6823         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6824                                               CGF.Builder.getInt32(1));
6825       }
6826       return NumThreads;
6827     }
6828     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6829       return CGF.Builder.getInt32(1);
6830     return DefaultThreadLimitVal;
6831   }
6832   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6833                                : CGF.Builder.getInt32(0);
6834 }
6835 
6836 /// Emit the number of threads for a target directive.  Inspect the
6837 /// thread_limit clause associated with a teams construct combined or closely
6838 /// nested with the target directive.
6839 ///
6840 /// Emit the num_threads clause for directives such as 'target parallel' that
6841 /// have no associated teams construct.
6842 ///
6843 /// Otherwise, return nullptr.
6844 static llvm::Value *
6845 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6846                                  const OMPExecutableDirective &D) {
6847   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6848          "Clauses associated with the teams directive expected to be emitted "
6849          "only for the host!");
6850   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6851   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6852          "Expected target-based executable directive.");
6853   CGBuilderTy &Bld = CGF.Builder;
6854   llvm::Value *ThreadLimitVal = nullptr;
6855   llvm::Value *NumThreadsVal = nullptr;
6856   switch (DirectiveKind) {
6857   case OMPD_target: {
6858     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6859     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6860       return NumThreads;
6861     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6862         CGF.getContext(), CS->getCapturedStmt());
6863     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6864       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6865         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6866         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6867         const auto *ThreadLimitClause =
6868             Dir->getSingleClause<OMPThreadLimitClause>();
6869         CodeGenFunction::LexicalScope Scope(
6870             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6871         if (const auto *PreInit =
6872                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6873           for (const auto *I : PreInit->decls()) {
6874             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6875               CGF.EmitVarDecl(cast<VarDecl>(*I));
6876             } else {
6877               CodeGenFunction::AutoVarEmission Emission =
6878                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6879               CGF.EmitAutoVarCleanups(Emission);
6880             }
6881           }
6882         }
6883         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6884             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6885         ThreadLimitVal =
6886             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6887       }
6888       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6889           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6890         CS = Dir->getInnermostCapturedStmt();
6891         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6892             CGF.getContext(), CS->getCapturedStmt());
6893         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6894       }
6895       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6896           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6897         CS = Dir->getInnermostCapturedStmt();
6898         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6899           return NumThreads;
6900       }
6901       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6902         return Bld.getInt32(1);
6903     }
6904     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6905   }
6906   case OMPD_target_teams: {
6907     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6908       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6909       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6910       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6911           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6912       ThreadLimitVal =
6913           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6914     }
6915     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6916     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6917       return NumThreads;
6918     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6919         CGF.getContext(), CS->getCapturedStmt());
6920     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6921       if (Dir->getDirectiveKind() == OMPD_distribute) {
6922         CS = Dir->getInnermostCapturedStmt();
6923         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6924           return NumThreads;
6925       }
6926     }
6927     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6928   }
6929   case OMPD_target_teams_distribute:
6930     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6931       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6932       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6933       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6934           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6935       ThreadLimitVal =
6936           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6937     }
6938     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6939   case OMPD_target_parallel:
6940   case OMPD_target_parallel_for:
6941   case OMPD_target_parallel_for_simd:
6942   case OMPD_target_teams_distribute_parallel_for:
6943   case OMPD_target_teams_distribute_parallel_for_simd: {
6944     llvm::Value *CondVal = nullptr;
6945     // Handle if clause. If if clause present, the number of threads is
6946     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6947     if (D.hasClausesOfKind<OMPIfClause>()) {
6948       const OMPIfClause *IfClause = nullptr;
6949       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6950         if (C->getNameModifier() == OMPD_unknown ||
6951             C->getNameModifier() == OMPD_parallel) {
6952           IfClause = C;
6953           break;
6954         }
6955       }
6956       if (IfClause) {
6957         const Expr *Cond = IfClause->getCondition();
6958         bool Result;
6959         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6960           if (!Result)
6961             return Bld.getInt32(1);
6962         } else {
6963           CodeGenFunction::RunCleanupsScope Scope(CGF);
6964           CondVal = CGF.EvaluateExprAsBool(Cond);
6965         }
6966       }
6967     }
6968     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6969       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6970       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6971       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6972           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6973       ThreadLimitVal =
6974           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6975     }
6976     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6977       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6978       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6979       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6980           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6981       NumThreadsVal =
6982           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6983       ThreadLimitVal = ThreadLimitVal
6984                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6985                                                                 ThreadLimitVal),
6986                                               NumThreadsVal, ThreadLimitVal)
6987                            : NumThreadsVal;
6988     }
6989     if (!ThreadLimitVal)
6990       ThreadLimitVal = Bld.getInt32(0);
6991     if (CondVal)
6992       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6993     return ThreadLimitVal;
6994   }
6995   case OMPD_target_teams_distribute_simd:
6996   case OMPD_target_simd:
6997     return Bld.getInt32(1);
6998   case OMPD_parallel:
6999   case OMPD_for:
7000   case OMPD_parallel_for:
7001   case OMPD_parallel_sections:
7002   case OMPD_for_simd:
7003   case OMPD_parallel_for_simd:
7004   case OMPD_cancel:
7005   case OMPD_cancellation_point:
7006   case OMPD_ordered:
7007   case OMPD_threadprivate:
7008   case OMPD_allocate:
7009   case OMPD_task:
7010   case OMPD_simd:
7011   case OMPD_sections:
7012   case OMPD_section:
7013   case OMPD_single:
7014   case OMPD_master:
7015   case OMPD_critical:
7016   case OMPD_taskyield:
7017   case OMPD_barrier:
7018   case OMPD_taskwait:
7019   case OMPD_taskgroup:
7020   case OMPD_atomic:
7021   case OMPD_flush:
7022   case OMPD_teams:
7023   case OMPD_target_data:
7024   case OMPD_target_exit_data:
7025   case OMPD_target_enter_data:
7026   case OMPD_distribute:
7027   case OMPD_distribute_simd:
7028   case OMPD_distribute_parallel_for:
7029   case OMPD_distribute_parallel_for_simd:
7030   case OMPD_teams_distribute:
7031   case OMPD_teams_distribute_simd:
7032   case OMPD_teams_distribute_parallel_for:
7033   case OMPD_teams_distribute_parallel_for_simd:
7034   case OMPD_target_update:
7035   case OMPD_declare_simd:
7036   case OMPD_declare_target:
7037   case OMPD_end_declare_target:
7038   case OMPD_declare_reduction:
7039   case OMPD_declare_mapper:
7040   case OMPD_taskloop:
7041   case OMPD_taskloop_simd:
7042   case OMPD_requires:
7043   case OMPD_unknown:
7044     break;
7045   }
7046   llvm_unreachable("Unsupported directive kind.");
7047 }
7048 
7049 namespace {
7050 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7051 
7052 // Utility to handle information from clauses associated with a given
7053 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7054 // It provides a convenient interface to obtain the information and generate
7055 // code for that information.
7056 class MappableExprsHandler {
7057 public:
7058   /// Values for bit flags used to specify the mapping type for
7059   /// offloading.
7060   enum OpenMPOffloadMappingFlags : uint64_t {
7061     /// No flags
7062     OMP_MAP_NONE = 0x0,
7063     /// Allocate memory on the device and move data from host to device.
7064     OMP_MAP_TO = 0x01,
7065     /// Allocate memory on the device and move data from device to host.
7066     OMP_MAP_FROM = 0x02,
7067     /// Always perform the requested mapping action on the element, even
7068     /// if it was already mapped before.
7069     OMP_MAP_ALWAYS = 0x04,
7070     /// Delete the element from the device environment, ignoring the
7071     /// current reference count associated with the element.
7072     OMP_MAP_DELETE = 0x08,
7073     /// The element being mapped is a pointer-pointee pair; both the
7074     /// pointer and the pointee should be mapped.
7075     OMP_MAP_PTR_AND_OBJ = 0x10,
7076     /// This flags signals that the base address of an entry should be
7077     /// passed to the target kernel as an argument.
7078     OMP_MAP_TARGET_PARAM = 0x20,
7079     /// Signal that the runtime library has to return the device pointer
7080     /// in the current position for the data being mapped. Used when we have the
7081     /// use_device_ptr clause.
7082     OMP_MAP_RETURN_PARAM = 0x40,
7083     /// This flag signals that the reference being passed is a pointer to
7084     /// private data.
7085     OMP_MAP_PRIVATE = 0x80,
7086     /// Pass the element to the device by value.
7087     OMP_MAP_LITERAL = 0x100,
7088     /// Implicit map
7089     OMP_MAP_IMPLICIT = 0x200,
7090     /// The 16 MSBs of the flags indicate whether the entry is member of some
7091     /// struct/class.
7092     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7093     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7094   };
7095 
7096   /// Class that associates information with a base pointer to be passed to the
7097   /// runtime library.
7098   class BasePointerInfo {
7099     /// The base pointer.
7100     llvm::Value *Ptr = nullptr;
7101     /// The base declaration that refers to this device pointer, or null if
7102     /// there is none.
7103     const ValueDecl *DevPtrDecl = nullptr;
7104 
7105   public:
7106     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7107         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7108     llvm::Value *operator*() const { return Ptr; }
7109     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7110     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7111   };
7112 
7113   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7114   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7115   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7116 
7117   /// Map between a struct and the its lowest & highest elements which have been
7118   /// mapped.
7119   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7120   ///                    HE(FieldIndex, Pointer)}
7121   struct StructRangeInfoTy {
7122     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7123         0, Address::invalid()};
7124     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7125         0, Address::invalid()};
7126     Address Base = Address::invalid();
7127   };
7128 
7129 private:
7130   /// Kind that defines how a device pointer has to be returned.
7131   struct MapInfo {
7132     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7133     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7134     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7135     bool ReturnDevicePointer = false;
7136     bool IsImplicit = false;
7137 
7138     MapInfo() = default;
7139     MapInfo(
7140         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7141         OpenMPMapClauseKind MapType,
7142         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7143         bool ReturnDevicePointer, bool IsImplicit)
7144         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7145           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
7146   };
7147 
7148   /// If use_device_ptr is used on a pointer which is a struct member and there
7149   /// is no map information about it, then emission of that entry is deferred
7150   /// until the whole struct has been processed.
7151   struct DeferredDevicePtrEntryTy {
7152     const Expr *IE = nullptr;
7153     const ValueDecl *VD = nullptr;
7154 
7155     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD)
7156         : IE(IE), VD(VD) {}
7157   };
7158 
7159   /// Directive from where the map clauses were extracted.
7160   const OMPExecutableDirective &CurDir;
7161 
7162   /// Function the directive is being generated for.
7163   CodeGenFunction &CGF;
7164 
7165   /// Set of all first private variables in the current directive.
7166   /// bool data is set to true if the variable is implicitly marked as
7167   /// firstprivate, false otherwise.
7168   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7169 
7170   /// Map between device pointer declarations and their expression components.
7171   /// The key value for declarations in 'this' is null.
7172   llvm::DenseMap<
7173       const ValueDecl *,
7174       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7175       DevPointersMap;
7176 
7177   llvm::Value *getExprTypeSize(const Expr *E) const {
7178     QualType ExprTy = E->getType().getCanonicalType();
7179 
7180     // Reference types are ignored for mapping purposes.
7181     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7182       ExprTy = RefTy->getPointeeType().getCanonicalType();
7183 
7184     // Given that an array section is considered a built-in type, we need to
7185     // do the calculation based on the length of the section instead of relying
7186     // on CGF.getTypeSize(E->getType()).
7187     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7188       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7189                             OAE->getBase()->IgnoreParenImpCasts())
7190                             .getCanonicalType();
7191 
7192       // If there is no length associated with the expression, that means we
7193       // are using the whole length of the base.
7194       if (!OAE->getLength() && OAE->getColonLoc().isValid())
7195         return CGF.getTypeSize(BaseTy);
7196 
7197       llvm::Value *ElemSize;
7198       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7199         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7200       } else {
7201         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7202         assert(ATy && "Expecting array type if not a pointer type.");
7203         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7204       }
7205 
7206       // If we don't have a length at this point, that is because we have an
7207       // array section with a single element.
7208       if (!OAE->getLength())
7209         return ElemSize;
7210 
7211       llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength());
7212       LengthVal =
7213           CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false);
7214       return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7215     }
7216     return CGF.getTypeSize(ExprTy);
7217   }
7218 
7219   /// Return the corresponding bits for a given map clause modifier. Add
7220   /// a flag marking the map as a pointer if requested. Add a flag marking the
7221   /// map as the first one of a series of maps that relate to the same map
7222   /// expression.
7223   OpenMPOffloadMappingFlags getMapTypeBits(
7224       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7225       bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7226     OpenMPOffloadMappingFlags Bits =
7227         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7228     switch (MapType) {
7229     case OMPC_MAP_alloc:
7230     case OMPC_MAP_release:
7231       // alloc and release is the default behavior in the runtime library,  i.e.
7232       // if we don't pass any bits alloc/release that is what the runtime is
7233       // going to do. Therefore, we don't need to signal anything for these two
7234       // type modifiers.
7235       break;
7236     case OMPC_MAP_to:
7237       Bits |= OMP_MAP_TO;
7238       break;
7239     case OMPC_MAP_from:
7240       Bits |= OMP_MAP_FROM;
7241       break;
7242     case OMPC_MAP_tofrom:
7243       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7244       break;
7245     case OMPC_MAP_delete:
7246       Bits |= OMP_MAP_DELETE;
7247       break;
7248     case OMPC_MAP_unknown:
7249       llvm_unreachable("Unexpected map type!");
7250     }
7251     if (AddPtrFlag)
7252       Bits |= OMP_MAP_PTR_AND_OBJ;
7253     if (AddIsTargetParamFlag)
7254       Bits |= OMP_MAP_TARGET_PARAM;
7255     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7256         != MapModifiers.end())
7257       Bits |= OMP_MAP_ALWAYS;
7258     return Bits;
7259   }
7260 
7261   /// Return true if the provided expression is a final array section. A
7262   /// final array section, is one whose length can't be proved to be one.
7263   bool isFinalArraySectionExpression(const Expr *E) const {
7264     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7265 
7266     // It is not an array section and therefore not a unity-size one.
7267     if (!OASE)
7268       return false;
7269 
7270     // An array section with no colon always refer to a single element.
7271     if (OASE->getColonLoc().isInvalid())
7272       return false;
7273 
7274     const Expr *Length = OASE->getLength();
7275 
7276     // If we don't have a length we have to check if the array has size 1
7277     // for this dimension. Also, we should always expect a length if the
7278     // base type is pointer.
7279     if (!Length) {
7280       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7281                              OASE->getBase()->IgnoreParenImpCasts())
7282                              .getCanonicalType();
7283       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7284         return ATy->getSize().getSExtValue() != 1;
7285       // If we don't have a constant dimension length, we have to consider
7286       // the current section as having any size, so it is not necessarily
7287       // unitary. If it happen to be unity size, that's user fault.
7288       return true;
7289     }
7290 
7291     // Check if the length evaluates to 1.
7292     Expr::EvalResult Result;
7293     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7294       return true; // Can have more that size 1.
7295 
7296     llvm::APSInt ConstLength = Result.Val.getInt();
7297     return ConstLength.getSExtValue() != 1;
7298   }
7299 
7300   /// Generate the base pointers, section pointers, sizes and map type
7301   /// bits for the provided map type, map modifier, and expression components.
7302   /// \a IsFirstComponent should be set to true if the provided set of
7303   /// components is the first associated with a capture.
7304   void generateInfoForComponentList(
7305       OpenMPMapClauseKind MapType,
7306       ArrayRef<OpenMPMapModifierKind> MapModifiers,
7307       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7308       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
7309       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
7310       StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7311       bool IsImplicit,
7312       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7313           OverlappedElements = llvm::None) const {
7314     // The following summarizes what has to be generated for each map and the
7315     // types below. The generated information is expressed in this order:
7316     // base pointer, section pointer, size, flags
7317     // (to add to the ones that come from the map type and modifier).
7318     //
7319     // double d;
7320     // int i[100];
7321     // float *p;
7322     //
7323     // struct S1 {
7324     //   int i;
7325     //   float f[50];
7326     // }
7327     // struct S2 {
7328     //   int i;
7329     //   float f[50];
7330     //   S1 s;
7331     //   double *p;
7332     //   struct S2 *ps;
7333     // }
7334     // S2 s;
7335     // S2 *ps;
7336     //
7337     // map(d)
7338     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7339     //
7340     // map(i)
7341     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7342     //
7343     // map(i[1:23])
7344     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7345     //
7346     // map(p)
7347     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7348     //
7349     // map(p[1:24])
7350     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7351     //
7352     // map(s)
7353     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7354     //
7355     // map(s.i)
7356     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7357     //
7358     // map(s.s.f)
7359     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7360     //
7361     // map(s.p)
7362     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7363     //
7364     // map(to: s.p[:22])
7365     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7366     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7367     // &(s.p), &(s.p[0]), 22*sizeof(double),
7368     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7369     // (*) alloc space for struct members, only this is a target parameter
7370     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7371     //      optimizes this entry out, same in the examples below)
7372     // (***) map the pointee (map: to)
7373     //
7374     // map(s.ps)
7375     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7376     //
7377     // map(from: s.ps->s.i)
7378     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7379     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7380     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7381     //
7382     // map(to: s.ps->ps)
7383     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7384     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7385     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7386     //
7387     // map(s.ps->ps->ps)
7388     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7389     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7390     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7391     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7392     //
7393     // map(to: s.ps->ps->s.f[:22])
7394     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7395     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7396     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7397     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7398     //
7399     // map(ps)
7400     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7401     //
7402     // map(ps->i)
7403     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7404     //
7405     // map(ps->s.f)
7406     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7407     //
7408     // map(from: ps->p)
7409     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7410     //
7411     // map(to: ps->p[:22])
7412     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7413     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7414     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7415     //
7416     // map(ps->ps)
7417     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7418     //
7419     // map(from: ps->ps->s.i)
7420     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7421     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7422     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7423     //
7424     // map(from: ps->ps->ps)
7425     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7426     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7427     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7428     //
7429     // map(ps->ps->ps->ps)
7430     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7431     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7432     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7433     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7434     //
7435     // map(to: ps->ps->ps->s.f[:22])
7436     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7437     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7438     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7439     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7440     //
7441     // map(to: s.f[:22]) map(from: s.p[:33])
7442     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7443     //     sizeof(double*) (**), TARGET_PARAM
7444     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7445     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7446     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7447     // (*) allocate contiguous space needed to fit all mapped members even if
7448     //     we allocate space for members not mapped (in this example,
7449     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7450     //     them as well because they fall between &s.f[0] and &s.p)
7451     //
7452     // map(from: s.f[:22]) map(to: ps->p[:33])
7453     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7454     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7455     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7456     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7457     // (*) the struct this entry pertains to is the 2nd element in the list of
7458     //     arguments, hence MEMBER_OF(2)
7459     //
7460     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7461     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7462     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7463     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7464     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7465     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7466     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7467     // (*) the struct this entry pertains to is the 4th element in the list
7468     //     of arguments, hence MEMBER_OF(4)
7469 
7470     // Track if the map information being generated is the first for a capture.
7471     bool IsCaptureFirstInfo = IsFirstComponentList;
7472     // When the variable is on a declare target link or in a to clause with
7473     // unified memory, a reference is needed to hold the host/device address
7474     // of the variable.
7475     bool RequiresReference = false;
7476 
7477     // Scan the components from the base to the complete expression.
7478     auto CI = Components.rbegin();
7479     auto CE = Components.rend();
7480     auto I = CI;
7481 
7482     // Track if the map information being generated is the first for a list of
7483     // components.
7484     bool IsExpressionFirstInfo = true;
7485     Address BP = Address::invalid();
7486     const Expr *AssocExpr = I->getAssociatedExpression();
7487     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7488     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7489 
7490     if (isa<MemberExpr>(AssocExpr)) {
7491       // The base is the 'this' pointer. The content of the pointer is going
7492       // to be the base of the field being mapped.
7493       BP = CGF.LoadCXXThisAddress();
7494     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7495                (OASE &&
7496                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7497       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7498     } else {
7499       // The base is the reference to the variable.
7500       // BP = &Var.
7501       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7502       if (const auto *VD =
7503               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7504         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7505                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7506           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7507               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7508                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7509             RequiresReference = true;
7510             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7511           }
7512         }
7513       }
7514 
7515       // If the variable is a pointer and is being dereferenced (i.e. is not
7516       // the last component), the base has to be the pointer itself, not its
7517       // reference. References are ignored for mapping purposes.
7518       QualType Ty =
7519           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7520       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7521         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7522 
7523         // We do not need to generate individual map information for the
7524         // pointer, it can be associated with the combined storage.
7525         ++I;
7526       }
7527     }
7528 
7529     // Track whether a component of the list should be marked as MEMBER_OF some
7530     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7531     // in a component list should be marked as MEMBER_OF, all subsequent entries
7532     // do not belong to the base struct. E.g.
7533     // struct S2 s;
7534     // s.ps->ps->ps->f[:]
7535     //   (1) (2) (3) (4)
7536     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7537     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7538     // is the pointee of ps(2) which is not member of struct s, so it should not
7539     // be marked as such (it is still PTR_AND_OBJ).
7540     // The variable is initialized to false so that PTR_AND_OBJ entries which
7541     // are not struct members are not considered (e.g. array of pointers to
7542     // data).
7543     bool ShouldBeMemberOf = false;
7544 
7545     // Variable keeping track of whether or not we have encountered a component
7546     // in the component list which is a member expression. Useful when we have a
7547     // pointer or a final array section, in which case it is the previous
7548     // component in the list which tells us whether we have a member expression.
7549     // E.g. X.f[:]
7550     // While processing the final array section "[:]" it is "f" which tells us
7551     // whether we are dealing with a member of a declared struct.
7552     const MemberExpr *EncounteredME = nullptr;
7553 
7554     for (; I != CE; ++I) {
7555       // If the current component is member of a struct (parent struct) mark it.
7556       if (!EncounteredME) {
7557         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7558         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7559         // as MEMBER_OF the parent struct.
7560         if (EncounteredME)
7561           ShouldBeMemberOf = true;
7562       }
7563 
7564       auto Next = std::next(I);
7565 
7566       // We need to generate the addresses and sizes if this is the last
7567       // component, if the component is a pointer or if it is an array section
7568       // whose length can't be proved to be one. If this is a pointer, it
7569       // becomes the base address for the following components.
7570 
7571       // A final array section, is one whose length can't be proved to be one.
7572       bool IsFinalArraySection =
7573           isFinalArraySectionExpression(I->getAssociatedExpression());
7574 
7575       // Get information on whether the element is a pointer. Have to do a
7576       // special treatment for array sections given that they are built-in
7577       // types.
7578       const auto *OASE =
7579           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7580       bool IsPointer =
7581           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7582                        .getCanonicalType()
7583                        ->isAnyPointerType()) ||
7584           I->getAssociatedExpression()->getType()->isAnyPointerType();
7585 
7586       if (Next == CE || IsPointer || IsFinalArraySection) {
7587         // If this is not the last component, we expect the pointer to be
7588         // associated with an array expression or member expression.
7589         assert((Next == CE ||
7590                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7591                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7592                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
7593                "Unexpected expression");
7594 
7595         Address LB =
7596             CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress();
7597 
7598         // If this component is a pointer inside the base struct then we don't
7599         // need to create any entry for it - it will be combined with the object
7600         // it is pointing to into a single PTR_AND_OBJ entry.
7601         bool IsMemberPointer =
7602             IsPointer && EncounteredME &&
7603             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7604              EncounteredME);
7605         if (!OverlappedElements.empty()) {
7606           // Handle base element with the info for overlapped elements.
7607           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7608           assert(Next == CE &&
7609                  "Expected last element for the overlapped elements.");
7610           assert(!IsPointer &&
7611                  "Unexpected base element with the pointer type.");
7612           // Mark the whole struct as the struct that requires allocation on the
7613           // device.
7614           PartialStruct.LowestElem = {0, LB};
7615           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7616               I->getAssociatedExpression()->getType());
7617           Address HB = CGF.Builder.CreateConstGEP(
7618               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7619                                                               CGF.VoidPtrTy),
7620               TypeSize.getQuantity() - 1);
7621           PartialStruct.HighestElem = {
7622               std::numeric_limits<decltype(
7623                   PartialStruct.HighestElem.first)>::max(),
7624               HB};
7625           PartialStruct.Base = BP;
7626           // Emit data for non-overlapped data.
7627           OpenMPOffloadMappingFlags Flags =
7628               OMP_MAP_MEMBER_OF |
7629               getMapTypeBits(MapType, MapModifiers, IsImplicit,
7630                              /*AddPtrFlag=*/false,
7631                              /*AddIsTargetParamFlag=*/false);
7632           LB = BP;
7633           llvm::Value *Size = nullptr;
7634           // Do bitcopy of all non-overlapped structure elements.
7635           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7636                    Component : OverlappedElements) {
7637             Address ComponentLB = Address::invalid();
7638             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7639                  Component) {
7640               if (MC.getAssociatedDeclaration()) {
7641                 ComponentLB =
7642                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7643                         .getAddress();
7644                 Size = CGF.Builder.CreatePtrDiff(
7645                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7646                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7647                 break;
7648               }
7649             }
7650             BasePointers.push_back(BP.getPointer());
7651             Pointers.push_back(LB.getPointer());
7652             Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty,
7653                                                       /*isSigned=*/true));
7654             Types.push_back(Flags);
7655             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7656           }
7657           BasePointers.push_back(BP.getPointer());
7658           Pointers.push_back(LB.getPointer());
7659           Size = CGF.Builder.CreatePtrDiff(
7660               CGF.EmitCastToVoidPtr(
7661                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7662               CGF.EmitCastToVoidPtr(LB.getPointer()));
7663           Sizes.push_back(
7664               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7665           Types.push_back(Flags);
7666           break;
7667         }
7668         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7669         if (!IsMemberPointer) {
7670           BasePointers.push_back(BP.getPointer());
7671           Pointers.push_back(LB.getPointer());
7672           Sizes.push_back(
7673               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7674 
7675           // We need to add a pointer flag for each map that comes from the
7676           // same expression except for the first one. We also need to signal
7677           // this map is the first one that relates with the current capture
7678           // (there is a set of entries for each capture).
7679           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7680               MapType, MapModifiers, IsImplicit,
7681               !IsExpressionFirstInfo || RequiresReference,
7682               IsCaptureFirstInfo && !RequiresReference);
7683 
7684           if (!IsExpressionFirstInfo) {
7685             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7686             // then we reset the TO/FROM/ALWAYS/DELETE flags.
7687             if (IsPointer)
7688               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7689                          OMP_MAP_DELETE);
7690 
7691             if (ShouldBeMemberOf) {
7692               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7693               // should be later updated with the correct value of MEMBER_OF.
7694               Flags |= OMP_MAP_MEMBER_OF;
7695               // From now on, all subsequent PTR_AND_OBJ entries should not be
7696               // marked as MEMBER_OF.
7697               ShouldBeMemberOf = false;
7698             }
7699           }
7700 
7701           Types.push_back(Flags);
7702         }
7703 
7704         // If we have encountered a member expression so far, keep track of the
7705         // mapped member. If the parent is "*this", then the value declaration
7706         // is nullptr.
7707         if (EncounteredME) {
7708           const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl());
7709           unsigned FieldIndex = FD->getFieldIndex();
7710 
7711           // Update info about the lowest and highest elements for this struct
7712           if (!PartialStruct.Base.isValid()) {
7713             PartialStruct.LowestElem = {FieldIndex, LB};
7714             PartialStruct.HighestElem = {FieldIndex, LB};
7715             PartialStruct.Base = BP;
7716           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7717             PartialStruct.LowestElem = {FieldIndex, LB};
7718           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7719             PartialStruct.HighestElem = {FieldIndex, LB};
7720           }
7721         }
7722 
7723         // If we have a final array section, we are done with this expression.
7724         if (IsFinalArraySection)
7725           break;
7726 
7727         // The pointer becomes the base for the next element.
7728         if (Next != CE)
7729           BP = LB;
7730 
7731         IsExpressionFirstInfo = false;
7732         IsCaptureFirstInfo = false;
7733       }
7734     }
7735   }
7736 
7737   /// Return the adjusted map modifiers if the declaration a capture refers to
7738   /// appears in a first-private clause. This is expected to be used only with
7739   /// directives that start with 'target'.
7740   MappableExprsHandler::OpenMPOffloadMappingFlags
7741   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7742     assert(Cap.capturesVariable() && "Expected capture by reference only!");
7743 
7744     // A first private variable captured by reference will use only the
7745     // 'private ptr' and 'map to' flag. Return the right flags if the captured
7746     // declaration is known as first-private in this handler.
7747     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7748       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
7749           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
7750         return MappableExprsHandler::OMP_MAP_ALWAYS |
7751                MappableExprsHandler::OMP_MAP_TO;
7752       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7753         return MappableExprsHandler::OMP_MAP_TO |
7754                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
7755       return MappableExprsHandler::OMP_MAP_PRIVATE |
7756              MappableExprsHandler::OMP_MAP_TO;
7757     }
7758     return MappableExprsHandler::OMP_MAP_TO |
7759            MappableExprsHandler::OMP_MAP_FROM;
7760   }
7761 
7762   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7763     // Member of is given by the 16 MSB of the flag, so rotate by 48 bits.
7764     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7765                                                   << 48);
7766   }
7767 
7768   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7769                                      OpenMPOffloadMappingFlags MemberOfFlag) {
7770     // If the entry is PTR_AND_OBJ but has not been marked with the special
7771     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7772     // marked as MEMBER_OF.
7773     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
7774         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
7775       return;
7776 
7777     // Reset the placeholder value to prepare the flag for the assignment of the
7778     // proper MEMBER_OF value.
7779     Flags &= ~OMP_MAP_MEMBER_OF;
7780     Flags |= MemberOfFlag;
7781   }
7782 
7783   void getPlainLayout(const CXXRecordDecl *RD,
7784                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7785                       bool AsBase) const {
7786     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7787 
7788     llvm::StructType *St =
7789         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7790 
7791     unsigned NumElements = St->getNumElements();
7792     llvm::SmallVector<
7793         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7794         RecordLayout(NumElements);
7795 
7796     // Fill bases.
7797     for (const auto &I : RD->bases()) {
7798       if (I.isVirtual())
7799         continue;
7800       const auto *Base = I.getType()->getAsCXXRecordDecl();
7801       // Ignore empty bases.
7802       if (Base->isEmpty() || CGF.getContext()
7803                                  .getASTRecordLayout(Base)
7804                                  .getNonVirtualSize()
7805                                  .isZero())
7806         continue;
7807 
7808       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7809       RecordLayout[FieldIndex] = Base;
7810     }
7811     // Fill in virtual bases.
7812     for (const auto &I : RD->vbases()) {
7813       const auto *Base = I.getType()->getAsCXXRecordDecl();
7814       // Ignore empty bases.
7815       if (Base->isEmpty())
7816         continue;
7817       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7818       if (RecordLayout[FieldIndex])
7819         continue;
7820       RecordLayout[FieldIndex] = Base;
7821     }
7822     // Fill in all the fields.
7823     assert(!RD->isUnion() && "Unexpected union.");
7824     for (const auto *Field : RD->fields()) {
7825       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7826       // will fill in later.)
7827       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7828         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7829         RecordLayout[FieldIndex] = Field;
7830       }
7831     }
7832     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7833              &Data : RecordLayout) {
7834       if (Data.isNull())
7835         continue;
7836       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7837         getPlainLayout(Base, Layout, /*AsBase=*/true);
7838       else
7839         Layout.push_back(Data.get<const FieldDecl *>());
7840     }
7841   }
7842 
7843 public:
7844   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
7845       : CurDir(Dir), CGF(CGF) {
7846     // Extract firstprivate clause information.
7847     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
7848       for (const auto *D : C->varlists())
7849         FirstPrivateDecls.try_emplace(
7850             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
7851     // Extract device pointer clause information.
7852     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
7853       for (auto L : C->component_lists())
7854         DevPointersMap[L.first].push_back(L.second);
7855   }
7856 
7857   /// Generate code for the combined entry if we have a partially mapped struct
7858   /// and take care of the mapping flags of the arguments corresponding to
7859   /// individual struct members.
7860   void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
7861                          MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7862                          MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
7863                          const StructRangeInfoTy &PartialStruct) const {
7864     // Base is the base of the struct
7865     BasePointers.push_back(PartialStruct.Base.getPointer());
7866     // Pointer is the address of the lowest element
7867     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
7868     Pointers.push_back(LB);
7869     // Size is (addr of {highest+1} element) - (addr of lowest element)
7870     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
7871     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
7872     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
7873     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
7874     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
7875     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
7876                                                   /*isSigned=*/false);
7877     Sizes.push_back(Size);
7878     // Map type is always TARGET_PARAM
7879     Types.push_back(OMP_MAP_TARGET_PARAM);
7880     // Remove TARGET_PARAM flag from the first element
7881     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
7882 
7883     // All other current entries will be MEMBER_OF the combined entry
7884     // (except for PTR_AND_OBJ entries which do not have a placeholder value
7885     // 0xFFFF in the MEMBER_OF field).
7886     OpenMPOffloadMappingFlags MemberOfFlag =
7887         getMemberOfFlag(BasePointers.size() - 1);
7888     for (auto &M : CurTypes)
7889       setCorrectMemberOfFlag(M, MemberOfFlag);
7890   }
7891 
7892   /// Generate all the base pointers, section pointers, sizes and map
7893   /// types for the extracted mappable expressions. Also, for each item that
7894   /// relates with a device pointer, a pair of the relevant declaration and
7895   /// index where it occurs is appended to the device pointers info array.
7896   void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
7897                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7898                        MapFlagsArrayTy &Types) const {
7899     // We have to process the component lists that relate with the same
7900     // declaration in a single chunk so that we can generate the map flags
7901     // correctly. Therefore, we organize all lists in a map.
7902     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
7903 
7904     // Helper function to fill the information map for the different supported
7905     // clauses.
7906     auto &&InfoGen = [&Info](
7907         const ValueDecl *D,
7908         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7909         OpenMPMapClauseKind MapType,
7910         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7911         bool ReturnDevicePointer, bool IsImplicit) {
7912       const ValueDecl *VD =
7913           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
7914       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
7915                             IsImplicit);
7916     };
7917 
7918     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
7919     for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
7920       for (const auto &L : C->component_lists()) {
7921         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
7922             /*ReturnDevicePointer=*/false, C->isImplicit());
7923       }
7924     for (const auto *C : this->CurDir.getClausesOfKind<OMPToClause>())
7925       for (const auto &L : C->component_lists()) {
7926         InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
7927             /*ReturnDevicePointer=*/false, C->isImplicit());
7928       }
7929     for (const auto *C : this->CurDir.getClausesOfKind<OMPFromClause>())
7930       for (const auto &L : C->component_lists()) {
7931         InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
7932             /*ReturnDevicePointer=*/false, C->isImplicit());
7933       }
7934 
7935     // Look at the use_device_ptr clause information and mark the existing map
7936     // entries as such. If there is no map information for an entry in the
7937     // use_device_ptr list, we create one with map type 'alloc' and zero size
7938     // section. It is the user fault if that was not mapped before. If there is
7939     // no map information and the pointer is a struct member, then we defer the
7940     // emission of that entry until the whole struct has been processed.
7941     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
7942         DeferredInfo;
7943 
7944     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
7945     for (const auto *C :
7946         this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) {
7947       for (const auto &L : C->component_lists()) {
7948         assert(!L.second.empty() && "Not expecting empty list of components!");
7949         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
7950         VD = cast<ValueDecl>(VD->getCanonicalDecl());
7951         const Expr *IE = L.second.back().getAssociatedExpression();
7952         // If the first component is a member expression, we have to look into
7953         // 'this', which maps to null in the map of map information. Otherwise
7954         // look directly for the information.
7955         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7956 
7957         // We potentially have map information for this declaration already.
7958         // Look for the first set of components that refer to it.
7959         if (It != Info.end()) {
7960           auto CI = std::find_if(
7961               It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
7962                 return MI.Components.back().getAssociatedDeclaration() == VD;
7963               });
7964           // If we found a map entry, signal that the pointer has to be returned
7965           // and move on to the next declaration.
7966           if (CI != It->second.end()) {
7967             CI->ReturnDevicePointer = true;
7968             continue;
7969           }
7970         }
7971 
7972         // We didn't find any match in our map information - generate a zero
7973         // size array section - if the pointer is a struct member we defer this
7974         // action until the whole struct has been processed.
7975         // FIXME: MSVC 2013 seems to require this-> to find member CGF.
7976         if (isa<MemberExpr>(IE)) {
7977           // Insert the pointer into Info to be processed by
7978           // generateInfoForComponentList. Because it is a member pointer
7979           // without a pointee, no entry will be generated for it, therefore
7980           // we need to generate one after the whole struct has been processed.
7981           // Nonetheless, generateInfoForComponentList must be called to take
7982           // the pointer into account for the calculation of the range of the
7983           // partial struct.
7984           InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
7985                   /*ReturnDevicePointer=*/false, C->isImplicit());
7986           DeferredInfo[nullptr].emplace_back(IE, VD);
7987         } else {
7988           llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
7989               this->CGF.EmitLValue(IE), IE->getExprLoc());
7990           BasePointers.emplace_back(Ptr, VD);
7991           Pointers.push_back(Ptr);
7992           Sizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
7993           Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
7994         }
7995       }
7996     }
7997 
7998     for (const auto &M : Info) {
7999       // We need to know when we generate information for the first component
8000       // associated with a capture, because the mapping flags depend on it.
8001       bool IsFirstComponentList = true;
8002 
8003       // Temporary versions of arrays
8004       MapBaseValuesArrayTy CurBasePointers;
8005       MapValuesArrayTy CurPointers;
8006       MapValuesArrayTy CurSizes;
8007       MapFlagsArrayTy CurTypes;
8008       StructRangeInfoTy PartialStruct;
8009 
8010       for (const MapInfo &L : M.second) {
8011         assert(!L.Components.empty() &&
8012                "Not expecting declaration with no component lists.");
8013 
8014         // Remember the current base pointer index.
8015         unsigned CurrentBasePointersIdx = CurBasePointers.size();
8016         // FIXME: MSVC 2013 seems to require this-> to find the member method.
8017         this->generateInfoForComponentList(
8018             L.MapType, L.MapModifiers, L.Components, CurBasePointers,
8019             CurPointers, CurSizes, CurTypes, PartialStruct,
8020             IsFirstComponentList, L.IsImplicit);
8021 
8022         // If this entry relates with a device pointer, set the relevant
8023         // declaration and add the 'return pointer' flag.
8024         if (L.ReturnDevicePointer) {
8025           assert(CurBasePointers.size() > CurrentBasePointersIdx &&
8026                  "Unexpected number of mapped base pointers.");
8027 
8028           const ValueDecl *RelevantVD =
8029               L.Components.back().getAssociatedDeclaration();
8030           assert(RelevantVD &&
8031                  "No relevant declaration related with device pointer??");
8032 
8033           CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
8034           CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8035         }
8036         IsFirstComponentList = false;
8037       }
8038 
8039       // Append any pending zero-length pointers which are struct members and
8040       // used with use_device_ptr.
8041       auto CI = DeferredInfo.find(M.first);
8042       if (CI != DeferredInfo.end()) {
8043         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8044           llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer();
8045           llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
8046               this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
8047           CurBasePointers.emplace_back(BasePtr, L.VD);
8048           CurPointers.push_back(Ptr);
8049           CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
8050           // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8051           // value MEMBER_OF=FFFF so that the entry is later updated with the
8052           // correct value of MEMBER_OF.
8053           CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8054                              OMP_MAP_MEMBER_OF);
8055         }
8056       }
8057 
8058       // If there is an entry in PartialStruct it means we have a struct with
8059       // individual members mapped. Emit an extra combined entry.
8060       if (PartialStruct.Base.isValid())
8061         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8062                           PartialStruct);
8063 
8064       // We need to append the results of this capture to what we already have.
8065       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8066       Pointers.append(CurPointers.begin(), CurPointers.end());
8067       Sizes.append(CurSizes.begin(), CurSizes.end());
8068       Types.append(CurTypes.begin(), CurTypes.end());
8069     }
8070   }
8071 
8072   /// Emit capture info for lambdas for variables captured by reference.
8073   void generateInfoForLambdaCaptures(
8074       const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
8075       MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8076       MapFlagsArrayTy &Types,
8077       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8078     const auto *RD = VD->getType()
8079                          .getCanonicalType()
8080                          .getNonReferenceType()
8081                          ->getAsCXXRecordDecl();
8082     if (!RD || !RD->isLambda())
8083       return;
8084     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8085     LValue VDLVal = CGF.MakeAddrLValue(
8086         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8087     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8088     FieldDecl *ThisCapture = nullptr;
8089     RD->getCaptureFields(Captures, ThisCapture);
8090     if (ThisCapture) {
8091       LValue ThisLVal =
8092           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8093       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8094       LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer());
8095       BasePointers.push_back(ThisLVal.getPointer());
8096       Pointers.push_back(ThisLValVal.getPointer());
8097       Sizes.push_back(
8098           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8099                                     CGF.Int64Ty, /*isSigned=*/true));
8100       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8101                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8102     }
8103     for (const LambdaCapture &LC : RD->captures()) {
8104       if (!LC.capturesVariable())
8105         continue;
8106       const VarDecl *VD = LC.getCapturedVar();
8107       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8108         continue;
8109       auto It = Captures.find(VD);
8110       assert(It != Captures.end() && "Found lambda capture without field.");
8111       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8112       if (LC.getCaptureKind() == LCK_ByRef) {
8113         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8114         LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer());
8115         BasePointers.push_back(VarLVal.getPointer());
8116         Pointers.push_back(VarLValVal.getPointer());
8117         Sizes.push_back(CGF.Builder.CreateIntCast(
8118             CGF.getTypeSize(
8119                 VD->getType().getCanonicalType().getNonReferenceType()),
8120             CGF.Int64Ty, /*isSigned=*/true));
8121       } else {
8122         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8123         LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer());
8124         BasePointers.push_back(VarLVal.getPointer());
8125         Pointers.push_back(VarRVal.getScalarVal());
8126         Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8127       }
8128       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8129                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8130     }
8131   }
8132 
8133   /// Set correct indices for lambdas captures.
8134   void adjustMemberOfForLambdaCaptures(
8135       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8136       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8137       MapFlagsArrayTy &Types) const {
8138     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8139       // Set correct member_of idx for all implicit lambda captures.
8140       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8141                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8142         continue;
8143       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8144       assert(BasePtr && "Unable to find base lambda address.");
8145       int TgtIdx = -1;
8146       for (unsigned J = I; J > 0; --J) {
8147         unsigned Idx = J - 1;
8148         if (Pointers[Idx] != BasePtr)
8149           continue;
8150         TgtIdx = Idx;
8151         break;
8152       }
8153       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8154       // All other current entries will be MEMBER_OF the combined entry
8155       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8156       // 0xFFFF in the MEMBER_OF field).
8157       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8158       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8159     }
8160   }
8161 
8162   /// Generate the base pointers, section pointers, sizes and map types
8163   /// associated to a given capture.
8164   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8165                               llvm::Value *Arg,
8166                               MapBaseValuesArrayTy &BasePointers,
8167                               MapValuesArrayTy &Pointers,
8168                               MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
8169                               StructRangeInfoTy &PartialStruct) const {
8170     assert(!Cap->capturesVariableArrayType() &&
8171            "Not expecting to generate map info for a variable array type!");
8172 
8173     // We need to know when we generating information for the first component
8174     const ValueDecl *VD = Cap->capturesThis()
8175                               ? nullptr
8176                               : Cap->getCapturedVar()->getCanonicalDecl();
8177 
8178     // If this declaration appears in a is_device_ptr clause we just have to
8179     // pass the pointer by value. If it is a reference to a declaration, we just
8180     // pass its value.
8181     if (DevPointersMap.count(VD)) {
8182       BasePointers.emplace_back(Arg, VD);
8183       Pointers.push_back(Arg);
8184       Sizes.push_back(
8185           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8186                                     CGF.Int64Ty, /*isSigned=*/true));
8187       Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8188       return;
8189     }
8190 
8191     using MapData =
8192         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8193                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
8194     SmallVector<MapData, 4> DeclComponentLists;
8195     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
8196     for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) {
8197       for (const auto &L : C->decl_component_lists(VD)) {
8198         assert(L.first == VD &&
8199                "We got information for the wrong declaration??");
8200         assert(!L.second.empty() &&
8201                "Not expecting declaration with no component lists.");
8202         DeclComponentLists.emplace_back(L.second, C->getMapType(),
8203                                         C->getMapTypeModifiers(),
8204                                         C->isImplicit());
8205       }
8206     }
8207 
8208     // Find overlapping elements (including the offset from the base element).
8209     llvm::SmallDenseMap<
8210         const MapData *,
8211         llvm::SmallVector<
8212             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8213         4>
8214         OverlappedData;
8215     size_t Count = 0;
8216     for (const MapData &L : DeclComponentLists) {
8217       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8218       OpenMPMapClauseKind MapType;
8219       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8220       bool IsImplicit;
8221       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8222       ++Count;
8223       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8224         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8225         std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
8226         auto CI = Components.rbegin();
8227         auto CE = Components.rend();
8228         auto SI = Components1.rbegin();
8229         auto SE = Components1.rend();
8230         for (; CI != CE && SI != SE; ++CI, ++SI) {
8231           if (CI->getAssociatedExpression()->getStmtClass() !=
8232               SI->getAssociatedExpression()->getStmtClass())
8233             break;
8234           // Are we dealing with different variables/fields?
8235           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8236             break;
8237         }
8238         // Found overlapping if, at least for one component, reached the head of
8239         // the components list.
8240         if (CI == CE || SI == SE) {
8241           assert((CI != CE || SI != SE) &&
8242                  "Unexpected full match of the mapping components.");
8243           const MapData &BaseData = CI == CE ? L : L1;
8244           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8245               SI == SE ? Components : Components1;
8246           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8247           OverlappedElements.getSecond().push_back(SubData);
8248         }
8249       }
8250     }
8251     // Sort the overlapped elements for each item.
8252     llvm::SmallVector<const FieldDecl *, 4> Layout;
8253     if (!OverlappedData.empty()) {
8254       if (const auto *CRD =
8255               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8256         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8257       else {
8258         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8259         Layout.append(RD->field_begin(), RD->field_end());
8260       }
8261     }
8262     for (auto &Pair : OverlappedData) {
8263       llvm::sort(
8264           Pair.getSecond(),
8265           [&Layout](
8266               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8267               OMPClauseMappableExprCommon::MappableExprComponentListRef
8268                   Second) {
8269             auto CI = First.rbegin();
8270             auto CE = First.rend();
8271             auto SI = Second.rbegin();
8272             auto SE = Second.rend();
8273             for (; CI != CE && SI != SE; ++CI, ++SI) {
8274               if (CI->getAssociatedExpression()->getStmtClass() !=
8275                   SI->getAssociatedExpression()->getStmtClass())
8276                 break;
8277               // Are we dealing with different variables/fields?
8278               if (CI->getAssociatedDeclaration() !=
8279                   SI->getAssociatedDeclaration())
8280                 break;
8281             }
8282 
8283             // Lists contain the same elements.
8284             if (CI == CE && SI == SE)
8285               return false;
8286 
8287             // List with less elements is less than list with more elements.
8288             if (CI == CE || SI == SE)
8289               return CI == CE;
8290 
8291             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8292             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8293             if (FD1->getParent() == FD2->getParent())
8294               return FD1->getFieldIndex() < FD2->getFieldIndex();
8295             const auto It =
8296                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8297                   return FD == FD1 || FD == FD2;
8298                 });
8299             return *It == FD1;
8300           });
8301     }
8302 
8303     // Associated with a capture, because the mapping flags depend on it.
8304     // Go through all of the elements with the overlapped elements.
8305     for (const auto &Pair : OverlappedData) {
8306       const MapData &L = *Pair.getFirst();
8307       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8308       OpenMPMapClauseKind MapType;
8309       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8310       bool IsImplicit;
8311       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8312       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8313           OverlappedComponents = Pair.getSecond();
8314       bool IsFirstComponentList = true;
8315       generateInfoForComponentList(MapType, MapModifiers, Components,
8316                                    BasePointers, Pointers, Sizes, Types,
8317                                    PartialStruct, IsFirstComponentList,
8318                                    IsImplicit, OverlappedComponents);
8319     }
8320     // Go through other elements without overlapped elements.
8321     bool IsFirstComponentList = OverlappedData.empty();
8322     for (const MapData &L : DeclComponentLists) {
8323       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8324       OpenMPMapClauseKind MapType;
8325       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8326       bool IsImplicit;
8327       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8328       auto It = OverlappedData.find(&L);
8329       if (It == OverlappedData.end())
8330         generateInfoForComponentList(MapType, MapModifiers, Components,
8331                                      BasePointers, Pointers, Sizes, Types,
8332                                      PartialStruct, IsFirstComponentList,
8333                                      IsImplicit);
8334       IsFirstComponentList = false;
8335     }
8336   }
8337 
8338   /// Generate the base pointers, section pointers, sizes and map types
8339   /// associated with the declare target link variables.
8340   void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
8341                                         MapValuesArrayTy &Pointers,
8342                                         MapValuesArrayTy &Sizes,
8343                                         MapFlagsArrayTy &Types) const {
8344     // Map other list items in the map clause which are not captured variables
8345     // but "declare target link" global variables.
8346     for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) {
8347       for (const auto &L : C->component_lists()) {
8348         if (!L.first)
8349           continue;
8350         const auto *VD = dyn_cast<VarDecl>(L.first);
8351         if (!VD)
8352           continue;
8353         llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8354             OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
8355         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8356             !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
8357           continue;
8358         StructRangeInfoTy PartialStruct;
8359         generateInfoForComponentList(
8360             C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
8361             Pointers, Sizes, Types, PartialStruct,
8362             /*IsFirstComponentList=*/true, C->isImplicit());
8363         assert(!PartialStruct.Base.isValid() &&
8364                "No partial structs for declare target link expected.");
8365       }
8366     }
8367   }
8368 
8369   /// Generate the default map information for a given capture \a CI,
8370   /// record field declaration \a RI and captured value \a CV.
8371   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8372                               const FieldDecl &RI, llvm::Value *CV,
8373                               MapBaseValuesArrayTy &CurBasePointers,
8374                               MapValuesArrayTy &CurPointers,
8375                               MapValuesArrayTy &CurSizes,
8376                               MapFlagsArrayTy &CurMapTypes) const {
8377     bool IsImplicit = true;
8378     // Do the default mapping.
8379     if (CI.capturesThis()) {
8380       CurBasePointers.push_back(CV);
8381       CurPointers.push_back(CV);
8382       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8383       CurSizes.push_back(
8384           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8385                                     CGF.Int64Ty, /*isSigned=*/true));
8386       // Default map type.
8387       CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8388     } else if (CI.capturesVariableByCopy()) {
8389       CurBasePointers.push_back(CV);
8390       CurPointers.push_back(CV);
8391       if (!RI.getType()->isAnyPointerType()) {
8392         // We have to signal to the runtime captures passed by value that are
8393         // not pointers.
8394         CurMapTypes.push_back(OMP_MAP_LITERAL);
8395         CurSizes.push_back(CGF.Builder.CreateIntCast(
8396             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8397       } else {
8398         // Pointers are implicitly mapped with a zero size and no flags
8399         // (other than first map that is added for all implicit maps).
8400         CurMapTypes.push_back(OMP_MAP_NONE);
8401         CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8402       }
8403       const VarDecl *VD = CI.getCapturedVar();
8404       auto I = FirstPrivateDecls.find(VD);
8405       if (I != FirstPrivateDecls.end())
8406         IsImplicit = I->getSecond();
8407     } else {
8408       assert(CI.capturesVariable() && "Expected captured reference.");
8409       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8410       QualType ElementType = PtrTy->getPointeeType();
8411       CurSizes.push_back(CGF.Builder.CreateIntCast(
8412           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8413       // The default map type for a scalar/complex type is 'to' because by
8414       // default the value doesn't have to be retrieved. For an aggregate
8415       // type, the default is 'tofrom'.
8416       CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
8417       const VarDecl *VD = CI.getCapturedVar();
8418       auto I = FirstPrivateDecls.find(VD);
8419       if (I != FirstPrivateDecls.end() &&
8420           VD->getType().isConstant(CGF.getContext())) {
8421         llvm::Constant *Addr =
8422             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8423         // Copy the value of the original variable to the new global copy.
8424         CGF.Builder.CreateMemCpy(
8425             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(),
8426             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8427             CurSizes.back(), /*IsVolatile=*/false);
8428         // Use new global variable as the base pointers.
8429         CurBasePointers.push_back(Addr);
8430         CurPointers.push_back(Addr);
8431       } else {
8432         CurBasePointers.push_back(CV);
8433         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8434           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8435               CV, ElementType, CGF.getContext().getDeclAlign(VD),
8436               AlignmentSource::Decl));
8437           CurPointers.push_back(PtrAddr.getPointer());
8438         } else {
8439           CurPointers.push_back(CV);
8440         }
8441       }
8442       if (I != FirstPrivateDecls.end())
8443         IsImplicit = I->getSecond();
8444     }
8445     // Every default map produces a single argument which is a target parameter.
8446     CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
8447 
8448     // Add flag stating this is an implicit map.
8449     if (IsImplicit)
8450       CurMapTypes.back() |= OMP_MAP_IMPLICIT;
8451   }
8452 };
8453 } // anonymous namespace
8454 
8455 /// Emit the arrays used to pass the captures and map information to the
8456 /// offloading runtime library. If there is no map or capture information,
8457 /// return nullptr by reference.
8458 static void
8459 emitOffloadingArrays(CodeGenFunction &CGF,
8460                      MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
8461                      MappableExprsHandler::MapValuesArrayTy &Pointers,
8462                      MappableExprsHandler::MapValuesArrayTy &Sizes,
8463                      MappableExprsHandler::MapFlagsArrayTy &MapTypes,
8464                      CGOpenMPRuntime::TargetDataInfo &Info) {
8465   CodeGenModule &CGM = CGF.CGM;
8466   ASTContext &Ctx = CGF.getContext();
8467 
8468   // Reset the array information.
8469   Info.clearArrayInfo();
8470   Info.NumberOfPtrs = BasePointers.size();
8471 
8472   if (Info.NumberOfPtrs) {
8473     // Detect if we have any capture size requiring runtime evaluation of the
8474     // size so that a constant array could be eventually used.
8475     bool hasRuntimeEvaluationCaptureSize = false;
8476     for (llvm::Value *S : Sizes)
8477       if (!isa<llvm::Constant>(S)) {
8478         hasRuntimeEvaluationCaptureSize = true;
8479         break;
8480       }
8481 
8482     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
8483     QualType PointerArrayType =
8484         Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
8485                                  /*IndexTypeQuals=*/0);
8486 
8487     Info.BasePointersArray =
8488         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
8489     Info.PointersArray =
8490         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
8491 
8492     // If we don't have any VLA types or other types that require runtime
8493     // evaluation, we can use a constant array for the map sizes, otherwise we
8494     // need to fill up the arrays as we do for the pointers.
8495     QualType Int64Ty =
8496         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
8497     if (hasRuntimeEvaluationCaptureSize) {
8498       QualType SizeArrayType =
8499           Ctx.getConstantArrayType(Int64Ty, PointerNumAP, ArrayType::Normal,
8500                                    /*IndexTypeQuals=*/0);
8501       Info.SizesArray =
8502           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
8503     } else {
8504       // We expect all the sizes to be constant, so we collect them to create
8505       // a constant array.
8506       SmallVector<llvm::Constant *, 16> ConstSizes;
8507       for (llvm::Value *S : Sizes)
8508         ConstSizes.push_back(cast<llvm::Constant>(S));
8509 
8510       auto *SizesArrayInit = llvm::ConstantArray::get(
8511           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
8512       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
8513       auto *SizesArrayGbl = new llvm::GlobalVariable(
8514           CGM.getModule(), SizesArrayInit->getType(),
8515           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8516           SizesArrayInit, Name);
8517       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8518       Info.SizesArray = SizesArrayGbl;
8519     }
8520 
8521     // The map types are always constant so we don't need to generate code to
8522     // fill arrays. Instead, we create an array constant.
8523     SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
8524     llvm::copy(MapTypes, Mapping.begin());
8525     llvm::Constant *MapTypesArrayInit =
8526         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8527     std::string MaptypesName =
8528         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8529     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
8530         CGM.getModule(), MapTypesArrayInit->getType(),
8531         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8532         MapTypesArrayInit, MaptypesName);
8533     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8534     Info.MapTypesArray = MapTypesArrayGbl;
8535 
8536     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
8537       llvm::Value *BPVal = *BasePointers[I];
8538       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
8539           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8540           Info.BasePointersArray, 0, I);
8541       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8542           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8543       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8544       CGF.Builder.CreateStore(BPVal, BPAddr);
8545 
8546       if (Info.requiresDevicePointerInfo())
8547         if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
8548           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8549 
8550       llvm::Value *PVal = Pointers[I];
8551       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
8552           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8553           Info.PointersArray, 0, I);
8554       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8555           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8556       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8557       CGF.Builder.CreateStore(PVal, PAddr);
8558 
8559       if (hasRuntimeEvaluationCaptureSize) {
8560         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
8561             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8562             Info.SizesArray,
8563             /*Idx0=*/0,
8564             /*Idx1=*/I);
8565         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
8566         CGF.Builder.CreateStore(
8567             CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true),
8568             SAddr);
8569       }
8570     }
8571   }
8572 }
8573 /// Emit the arguments to be passed to the runtime library based on the
8574 /// arrays of pointers, sizes and map types.
8575 static void emitOffloadingArraysArgument(
8576     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8577     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8578     llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
8579   CodeGenModule &CGM = CGF.CGM;
8580   if (Info.NumberOfPtrs) {
8581     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8582         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8583         Info.BasePointersArray,
8584         /*Idx0=*/0, /*Idx1=*/0);
8585     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8586         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8587         Info.PointersArray,
8588         /*Idx0=*/0,
8589         /*Idx1=*/0);
8590     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8591         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
8592         /*Idx0=*/0, /*Idx1=*/0);
8593     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8594         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8595         Info.MapTypesArray,
8596         /*Idx0=*/0,
8597         /*Idx1=*/0);
8598   } else {
8599     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8600     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8601     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8602     MapTypesArrayArg =
8603         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8604   }
8605 }
8606 
8607 /// Check for inner distribute directive.
8608 static const OMPExecutableDirective *
8609 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8610   const auto *CS = D.getInnermostCapturedStmt();
8611   const auto *Body =
8612       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8613   const Stmt *ChildStmt =
8614       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8615 
8616   if (const auto *NestedDir =
8617           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8618     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8619     switch (D.getDirectiveKind()) {
8620     case OMPD_target:
8621       if (isOpenMPDistributeDirective(DKind))
8622         return NestedDir;
8623       if (DKind == OMPD_teams) {
8624         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8625             /*IgnoreCaptured=*/true);
8626         if (!Body)
8627           return nullptr;
8628         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8629         if (const auto *NND =
8630                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8631           DKind = NND->getDirectiveKind();
8632           if (isOpenMPDistributeDirective(DKind))
8633             return NND;
8634         }
8635       }
8636       return nullptr;
8637     case OMPD_target_teams:
8638       if (isOpenMPDistributeDirective(DKind))
8639         return NestedDir;
8640       return nullptr;
8641     case OMPD_target_parallel:
8642     case OMPD_target_simd:
8643     case OMPD_target_parallel_for:
8644     case OMPD_target_parallel_for_simd:
8645       return nullptr;
8646     case OMPD_target_teams_distribute:
8647     case OMPD_target_teams_distribute_simd:
8648     case OMPD_target_teams_distribute_parallel_for:
8649     case OMPD_target_teams_distribute_parallel_for_simd:
8650     case OMPD_parallel:
8651     case OMPD_for:
8652     case OMPD_parallel_for:
8653     case OMPD_parallel_sections:
8654     case OMPD_for_simd:
8655     case OMPD_parallel_for_simd:
8656     case OMPD_cancel:
8657     case OMPD_cancellation_point:
8658     case OMPD_ordered:
8659     case OMPD_threadprivate:
8660     case OMPD_allocate:
8661     case OMPD_task:
8662     case OMPD_simd:
8663     case OMPD_sections:
8664     case OMPD_section:
8665     case OMPD_single:
8666     case OMPD_master:
8667     case OMPD_critical:
8668     case OMPD_taskyield:
8669     case OMPD_barrier:
8670     case OMPD_taskwait:
8671     case OMPD_taskgroup:
8672     case OMPD_atomic:
8673     case OMPD_flush:
8674     case OMPD_teams:
8675     case OMPD_target_data:
8676     case OMPD_target_exit_data:
8677     case OMPD_target_enter_data:
8678     case OMPD_distribute:
8679     case OMPD_distribute_simd:
8680     case OMPD_distribute_parallel_for:
8681     case OMPD_distribute_parallel_for_simd:
8682     case OMPD_teams_distribute:
8683     case OMPD_teams_distribute_simd:
8684     case OMPD_teams_distribute_parallel_for:
8685     case OMPD_teams_distribute_parallel_for_simd:
8686     case OMPD_target_update:
8687     case OMPD_declare_simd:
8688     case OMPD_declare_target:
8689     case OMPD_end_declare_target:
8690     case OMPD_declare_reduction:
8691     case OMPD_declare_mapper:
8692     case OMPD_taskloop:
8693     case OMPD_taskloop_simd:
8694     case OMPD_requires:
8695     case OMPD_unknown:
8696       llvm_unreachable("Unexpected directive.");
8697     }
8698   }
8699 
8700   return nullptr;
8701 }
8702 
8703 void CGOpenMPRuntime::emitTargetNumIterationsCall(
8704     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device,
8705     const llvm::function_ref<llvm::Value *(
8706         CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) {
8707   OpenMPDirectiveKind Kind = D.getDirectiveKind();
8708   const OMPExecutableDirective *TD = &D;
8709   // Get nested teams distribute kind directive, if any.
8710   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
8711     TD = getNestedDistributeDirective(CGM.getContext(), D);
8712   if (!TD)
8713     return;
8714   const auto *LD = cast<OMPLoopDirective>(TD);
8715   auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF,
8716                                                      PrePostActionTy &) {
8717     llvm::Value *NumIterations = SizeEmitter(CGF, *LD);
8718 
8719     // Emit device ID if any.
8720     llvm::Value *DeviceID;
8721     if (Device)
8722       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
8723                                            CGF.Int64Ty, /*isSigned=*/true);
8724     else
8725       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
8726 
8727     llvm::Value *Args[] = {DeviceID, NumIterations};
8728     CGF.EmitRuntimeCall(
8729         createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
8730   };
8731   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
8732 }
8733 
8734 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
8735                                      const OMPExecutableDirective &D,
8736                                      llvm::Function *OutlinedFn,
8737                                      llvm::Value *OutlinedFnID,
8738                                      const Expr *IfCond, const Expr *Device) {
8739   if (!CGF.HaveInsertPoint())
8740     return;
8741 
8742   assert(OutlinedFn && "Invalid outlined function!");
8743 
8744   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
8745   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
8746   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
8747   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
8748                                             PrePostActionTy &) {
8749     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
8750   };
8751   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
8752 
8753   CodeGenFunction::OMPTargetDataInfo InputInfo;
8754   llvm::Value *MapTypesArray = nullptr;
8755   // Fill up the pointer arrays and transfer execution to the device.
8756   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
8757                     &MapTypesArray, &CS, RequiresOuterTask,
8758                     &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) {
8759     // On top of the arrays that were filled up, the target offloading call
8760     // takes as arguments the device id as well as the host pointer. The host
8761     // pointer is used by the runtime library to identify the current target
8762     // region, so it only has to be unique and not necessarily point to
8763     // anything. It could be the pointer to the outlined function that
8764     // implements the target region, but we aren't using that so that the
8765     // compiler doesn't need to keep that, and could therefore inline the host
8766     // function if proven worthwhile during optimization.
8767 
8768     // From this point on, we need to have an ID of the target region defined.
8769     assert(OutlinedFnID && "Invalid outlined function ID!");
8770 
8771     // Emit device ID if any.
8772     llvm::Value *DeviceID;
8773     if (Device) {
8774       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
8775                                            CGF.Int64Ty, /*isSigned=*/true);
8776     } else {
8777       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
8778     }
8779 
8780     // Emit the number of elements in the offloading arrays.
8781     llvm::Value *PointerNum =
8782         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
8783 
8784     // Return value of the runtime offloading call.
8785     llvm::Value *Return;
8786 
8787     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
8788     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
8789 
8790     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
8791     // The target region is an outlined function launched by the runtime
8792     // via calls __tgt_target() or __tgt_target_teams().
8793     //
8794     // __tgt_target() launches a target region with one team and one thread,
8795     // executing a serial region.  This master thread may in turn launch
8796     // more threads within its team upon encountering a parallel region,
8797     // however, no additional teams can be launched on the device.
8798     //
8799     // __tgt_target_teams() launches a target region with one or more teams,
8800     // each with one or more threads.  This call is required for target
8801     // constructs such as:
8802     //  'target teams'
8803     //  'target' / 'teams'
8804     //  'target teams distribute parallel for'
8805     //  'target parallel'
8806     // and so on.
8807     //
8808     // Note that on the host and CPU targets, the runtime implementation of
8809     // these calls simply call the outlined function without forking threads.
8810     // The outlined functions themselves have runtime calls to
8811     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
8812     // the compiler in emitTeamsCall() and emitParallelCall().
8813     //
8814     // In contrast, on the NVPTX target, the implementation of
8815     // __tgt_target_teams() launches a GPU kernel with the requested number
8816     // of teams and threads so no additional calls to the runtime are required.
8817     if (NumTeams) {
8818       // If we have NumTeams defined this means that we have an enclosed teams
8819       // region. Therefore we also expect to have NumThreads defined. These two
8820       // values should be defined in the presence of a teams directive,
8821       // regardless of having any clauses associated. If the user is using teams
8822       // but no clauses, these two values will be the default that should be
8823       // passed to the runtime library - a 32-bit integer with the value zero.
8824       assert(NumThreads && "Thread limit expression should be available along "
8825                            "with number of teams.");
8826       llvm::Value *OffloadingArgs[] = {DeviceID,
8827                                        OutlinedFnID,
8828                                        PointerNum,
8829                                        InputInfo.BasePointersArray.getPointer(),
8830                                        InputInfo.PointersArray.getPointer(),
8831                                        InputInfo.SizesArray.getPointer(),
8832                                        MapTypesArray,
8833                                        NumTeams,
8834                                        NumThreads};
8835       Return = CGF.EmitRuntimeCall(
8836           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
8837                                           : OMPRTL__tgt_target_teams),
8838           OffloadingArgs);
8839     } else {
8840       llvm::Value *OffloadingArgs[] = {DeviceID,
8841                                        OutlinedFnID,
8842                                        PointerNum,
8843                                        InputInfo.BasePointersArray.getPointer(),
8844                                        InputInfo.PointersArray.getPointer(),
8845                                        InputInfo.SizesArray.getPointer(),
8846                                        MapTypesArray};
8847       Return = CGF.EmitRuntimeCall(
8848           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
8849                                           : OMPRTL__tgt_target),
8850           OffloadingArgs);
8851     }
8852 
8853     // Check the error code and execute the host version if required.
8854     llvm::BasicBlock *OffloadFailedBlock =
8855         CGF.createBasicBlock("omp_offload.failed");
8856     llvm::BasicBlock *OffloadContBlock =
8857         CGF.createBasicBlock("omp_offload.cont");
8858     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
8859     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
8860 
8861     CGF.EmitBlock(OffloadFailedBlock);
8862     if (RequiresOuterTask) {
8863       CapturedVars.clear();
8864       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
8865     }
8866     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
8867     CGF.EmitBranch(OffloadContBlock);
8868 
8869     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
8870   };
8871 
8872   // Notify that the host version must be executed.
8873   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
8874                     RequiresOuterTask](CodeGenFunction &CGF,
8875                                        PrePostActionTy &) {
8876     if (RequiresOuterTask) {
8877       CapturedVars.clear();
8878       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
8879     }
8880     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
8881   };
8882 
8883   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
8884                           &CapturedVars, RequiresOuterTask,
8885                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
8886     // Fill up the arrays with all the captured variables.
8887     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
8888     MappableExprsHandler::MapValuesArrayTy Pointers;
8889     MappableExprsHandler::MapValuesArrayTy Sizes;
8890     MappableExprsHandler::MapFlagsArrayTy MapTypes;
8891 
8892     // Get mappable expression information.
8893     MappableExprsHandler MEHandler(D, CGF);
8894     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
8895 
8896     auto RI = CS.getCapturedRecordDecl()->field_begin();
8897     auto CV = CapturedVars.begin();
8898     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
8899                                               CE = CS.capture_end();
8900          CI != CE; ++CI, ++RI, ++CV) {
8901       MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
8902       MappableExprsHandler::MapValuesArrayTy CurPointers;
8903       MappableExprsHandler::MapValuesArrayTy CurSizes;
8904       MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
8905       MappableExprsHandler::StructRangeInfoTy PartialStruct;
8906 
8907       // VLA sizes are passed to the outlined region by copy and do not have map
8908       // information associated.
8909       if (CI->capturesVariableArrayType()) {
8910         CurBasePointers.push_back(*CV);
8911         CurPointers.push_back(*CV);
8912         CurSizes.push_back(CGF.Builder.CreateIntCast(
8913             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
8914         // Copy to the device as an argument. No need to retrieve it.
8915         CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
8916                               MappableExprsHandler::OMP_MAP_TARGET_PARAM |
8917                               MappableExprsHandler::OMP_MAP_IMPLICIT);
8918       } else {
8919         // If we have any information in the map clause, we use it, otherwise we
8920         // just do a default mapping.
8921         MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
8922                                          CurSizes, CurMapTypes, PartialStruct);
8923         if (CurBasePointers.empty())
8924           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
8925                                            CurPointers, CurSizes, CurMapTypes);
8926         // Generate correct mapping for variables captured by reference in
8927         // lambdas.
8928         if (CI->capturesVariable())
8929           MEHandler.generateInfoForLambdaCaptures(
8930               CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
8931               CurMapTypes, LambdaPointers);
8932       }
8933       // We expect to have at least an element of information for this capture.
8934       assert(!CurBasePointers.empty() &&
8935              "Non-existing map pointer for capture!");
8936       assert(CurBasePointers.size() == CurPointers.size() &&
8937              CurBasePointers.size() == CurSizes.size() &&
8938              CurBasePointers.size() == CurMapTypes.size() &&
8939              "Inconsistent map information sizes!");
8940 
8941       // If there is an entry in PartialStruct it means we have a struct with
8942       // individual members mapped. Emit an extra combined entry.
8943       if (PartialStruct.Base.isValid())
8944         MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
8945                                     CurMapTypes, PartialStruct);
8946 
8947       // We need to append the results of this capture to what we already have.
8948       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8949       Pointers.append(CurPointers.begin(), CurPointers.end());
8950       Sizes.append(CurSizes.begin(), CurSizes.end());
8951       MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
8952     }
8953     // Adjust MEMBER_OF flags for the lambdas captures.
8954     MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
8955                                               Pointers, MapTypes);
8956     // Map other list items in the map clause which are not captured variables
8957     // but "declare target link" global variables.
8958     MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
8959                                                MapTypes);
8960 
8961     TargetDataInfo Info;
8962     // Fill up the arrays and create the arguments.
8963     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
8964     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
8965                                  Info.PointersArray, Info.SizesArray,
8966                                  Info.MapTypesArray, Info);
8967     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
8968     InputInfo.BasePointersArray =
8969         Address(Info.BasePointersArray, CGM.getPointerAlign());
8970     InputInfo.PointersArray =
8971         Address(Info.PointersArray, CGM.getPointerAlign());
8972     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
8973     MapTypesArray = Info.MapTypesArray;
8974     if (RequiresOuterTask)
8975       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
8976     else
8977       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
8978   };
8979 
8980   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
8981                              CodeGenFunction &CGF, PrePostActionTy &) {
8982     if (RequiresOuterTask) {
8983       CodeGenFunction::OMPTargetDataInfo InputInfo;
8984       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
8985     } else {
8986       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
8987     }
8988   };
8989 
8990   // If we have a target function ID it means that we need to support
8991   // offloading, otherwise, just execute on the host. We need to execute on host
8992   // regardless of the conditional in the if clause if, e.g., the user do not
8993   // specify target triples.
8994   if (OutlinedFnID) {
8995     if (IfCond) {
8996       emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
8997     } else {
8998       RegionCodeGenTy ThenRCG(TargetThenGen);
8999       ThenRCG(CGF);
9000     }
9001   } else {
9002     RegionCodeGenTy ElseRCG(TargetElseGen);
9003     ElseRCG(CGF);
9004   }
9005 }
9006 
9007 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9008                                                     StringRef ParentName) {
9009   if (!S)
9010     return;
9011 
9012   // Codegen OMP target directives that offload compute to the device.
9013   bool RequiresDeviceCodegen =
9014       isa<OMPExecutableDirective>(S) &&
9015       isOpenMPTargetExecutionDirective(
9016           cast<OMPExecutableDirective>(S)->getDirectiveKind());
9017 
9018   if (RequiresDeviceCodegen) {
9019     const auto &E = *cast<OMPExecutableDirective>(S);
9020     unsigned DeviceID;
9021     unsigned FileID;
9022     unsigned Line;
9023     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
9024                              FileID, Line);
9025 
9026     // Is this a target region that should not be emitted as an entry point? If
9027     // so just signal we are done with this target region.
9028     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
9029                                                             ParentName, Line))
9030       return;
9031 
9032     switch (E.getDirectiveKind()) {
9033     case OMPD_target:
9034       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9035                                                    cast<OMPTargetDirective>(E));
9036       break;
9037     case OMPD_target_parallel:
9038       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9039           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9040       break;
9041     case OMPD_target_teams:
9042       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9043           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9044       break;
9045     case OMPD_target_teams_distribute:
9046       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9047           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9048       break;
9049     case OMPD_target_teams_distribute_simd:
9050       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9051           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9052       break;
9053     case OMPD_target_parallel_for:
9054       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9055           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9056       break;
9057     case OMPD_target_parallel_for_simd:
9058       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9059           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9060       break;
9061     case OMPD_target_simd:
9062       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9063           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9064       break;
9065     case OMPD_target_teams_distribute_parallel_for:
9066       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9067           CGM, ParentName,
9068           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9069       break;
9070     case OMPD_target_teams_distribute_parallel_for_simd:
9071       CodeGenFunction::
9072           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9073               CGM, ParentName,
9074               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9075       break;
9076     case OMPD_parallel:
9077     case OMPD_for:
9078     case OMPD_parallel_for:
9079     case OMPD_parallel_sections:
9080     case OMPD_for_simd:
9081     case OMPD_parallel_for_simd:
9082     case OMPD_cancel:
9083     case OMPD_cancellation_point:
9084     case OMPD_ordered:
9085     case OMPD_threadprivate:
9086     case OMPD_allocate:
9087     case OMPD_task:
9088     case OMPD_simd:
9089     case OMPD_sections:
9090     case OMPD_section:
9091     case OMPD_single:
9092     case OMPD_master:
9093     case OMPD_critical:
9094     case OMPD_taskyield:
9095     case OMPD_barrier:
9096     case OMPD_taskwait:
9097     case OMPD_taskgroup:
9098     case OMPD_atomic:
9099     case OMPD_flush:
9100     case OMPD_teams:
9101     case OMPD_target_data:
9102     case OMPD_target_exit_data:
9103     case OMPD_target_enter_data:
9104     case OMPD_distribute:
9105     case OMPD_distribute_simd:
9106     case OMPD_distribute_parallel_for:
9107     case OMPD_distribute_parallel_for_simd:
9108     case OMPD_teams_distribute:
9109     case OMPD_teams_distribute_simd:
9110     case OMPD_teams_distribute_parallel_for:
9111     case OMPD_teams_distribute_parallel_for_simd:
9112     case OMPD_target_update:
9113     case OMPD_declare_simd:
9114     case OMPD_declare_target:
9115     case OMPD_end_declare_target:
9116     case OMPD_declare_reduction:
9117     case OMPD_declare_mapper:
9118     case OMPD_taskloop:
9119     case OMPD_taskloop_simd:
9120     case OMPD_requires:
9121     case OMPD_unknown:
9122       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9123     }
9124     return;
9125   }
9126 
9127   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9128     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9129       return;
9130 
9131     scanForTargetRegionsFunctions(
9132         E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
9133     return;
9134   }
9135 
9136   // If this is a lambda function, look into its body.
9137   if (const auto *L = dyn_cast<LambdaExpr>(S))
9138     S = L->getBody();
9139 
9140   // Keep looking for target regions recursively.
9141   for (const Stmt *II : S->children())
9142     scanForTargetRegionsFunctions(II, ParentName);
9143 }
9144 
9145 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9146   // If emitting code for the host, we do not process FD here. Instead we do
9147   // the normal code generation.
9148   if (!CGM.getLangOpts().OpenMPIsDevice)
9149     return false;
9150 
9151   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9152   StringRef Name = CGM.getMangledName(GD);
9153   // Try to detect target regions in the function.
9154   if (const auto *FD = dyn_cast<FunctionDecl>(VD))
9155     scanForTargetRegionsFunctions(FD->getBody(), Name);
9156 
9157   // Do not to emit function if it is not marked as declare target.
9158   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9159          AlreadyEmittedTargetFunctions.count(Name) == 0;
9160 }
9161 
9162 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9163   if (!CGM.getLangOpts().OpenMPIsDevice)
9164     return false;
9165 
9166   // Check if there are Ctors/Dtors in this declaration and look for target
9167   // regions in it. We use the complete variant to produce the kernel name
9168   // mangling.
9169   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9170   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9171     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9172       StringRef ParentName =
9173           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9174       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9175     }
9176     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9177       StringRef ParentName =
9178           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9179       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9180     }
9181   }
9182 
9183   // Do not to emit variable if it is not marked as declare target.
9184   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9185       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9186           cast<VarDecl>(GD.getDecl()));
9187   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9188       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9189        HasRequiresUnifiedSharedMemory)) {
9190     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9191     return true;
9192   }
9193   return false;
9194 }
9195 
9196 llvm::Constant *
9197 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
9198                                                 const VarDecl *VD) {
9199   assert(VD->getType().isConstant(CGM.getContext()) &&
9200          "Expected constant variable.");
9201   StringRef VarName;
9202   llvm::Constant *Addr;
9203   llvm::GlobalValue::LinkageTypes Linkage;
9204   QualType Ty = VD->getType();
9205   SmallString<128> Buffer;
9206   {
9207     unsigned DeviceID;
9208     unsigned FileID;
9209     unsigned Line;
9210     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
9211                              FileID, Line);
9212     llvm::raw_svector_ostream OS(Buffer);
9213     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
9214        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
9215     VarName = OS.str();
9216   }
9217   Linkage = llvm::GlobalValue::InternalLinkage;
9218   Addr =
9219       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
9220                                   getDefaultFirstprivateAddressSpace());
9221   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
9222   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
9223   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
9224   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9225       VarName, Addr, VarSize,
9226       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
9227   return Addr;
9228 }
9229 
9230 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9231                                                    llvm::Constant *Addr) {
9232   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9233       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9234   if (!Res) {
9235     if (CGM.getLangOpts().OpenMPIsDevice) {
9236       // Register non-target variables being emitted in device code (debug info
9237       // may cause this).
9238       StringRef VarName = CGM.getMangledName(VD);
9239       EmittedNonTargetVariables.try_emplace(VarName, Addr);
9240     }
9241     return;
9242   }
9243   // Register declare target variables.
9244   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
9245   StringRef VarName;
9246   CharUnits VarSize;
9247   llvm::GlobalValue::LinkageTypes Linkage;
9248 
9249   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9250       !HasRequiresUnifiedSharedMemory) {
9251     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9252     VarName = CGM.getMangledName(VD);
9253     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
9254       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
9255       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
9256     } else {
9257       VarSize = CharUnits::Zero();
9258     }
9259     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
9260     // Temp solution to prevent optimizations of the internal variables.
9261     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
9262       std::string RefName = getName({VarName, "ref"});
9263       if (!CGM.GetGlobalValue(RefName)) {
9264         llvm::Constant *AddrRef =
9265             getOrCreateInternalVariable(Addr->getType(), RefName);
9266         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
9267         GVAddrRef->setConstant(/*Val=*/true);
9268         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
9269         GVAddrRef->setInitializer(Addr);
9270         CGM.addCompilerUsedGlobal(GVAddrRef);
9271       }
9272     }
9273   } else {
9274     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
9275             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9276              HasRequiresUnifiedSharedMemory)) &&
9277            "Declare target attribute must link or to with unified memory.");
9278     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
9279       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
9280     else
9281       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9282 
9283     if (CGM.getLangOpts().OpenMPIsDevice) {
9284       VarName = Addr->getName();
9285       Addr = nullptr;
9286     } else {
9287       VarName = getAddrOfDeclareTargetVar(VD).getName();
9288       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
9289     }
9290     VarSize = CGM.getPointerSize();
9291     Linkage = llvm::GlobalValue::WeakAnyLinkage;
9292   }
9293 
9294   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9295       VarName, Addr, VarSize, Flags, Linkage);
9296 }
9297 
9298 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
9299   if (isa<FunctionDecl>(GD.getDecl()) ||
9300       isa<OMPDeclareReductionDecl>(GD.getDecl()))
9301     return emitTargetFunctions(GD);
9302 
9303   return emitTargetGlobalVariable(GD);
9304 }
9305 
9306 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
9307   for (const VarDecl *VD : DeferredGlobalVariables) {
9308     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9309         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9310     if (!Res)
9311       continue;
9312     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9313         !HasRequiresUnifiedSharedMemory) {
9314       CGM.EmitGlobal(VD);
9315     } else {
9316       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
9317               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9318                HasRequiresUnifiedSharedMemory)) &&
9319              "Expected link clause or to clause with unified memory.");
9320       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
9321     }
9322   }
9323 }
9324 
9325 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
9326     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
9327   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
9328          " Expected target-based directive.");
9329 }
9330 
9331 void CGOpenMPRuntime::checkArchForUnifiedAddressing(
9332     const OMPRequiresDecl *D) {
9333   for (const OMPClause *Clause : D->clauselists()) {
9334     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
9335       HasRequiresUnifiedSharedMemory = true;
9336       break;
9337     }
9338   }
9339 }
9340 
9341 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
9342                                                        LangAS &AS) {
9343   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
9344     return false;
9345   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
9346   switch(A->getAllocatorType()) {
9347   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
9348   // Not supported, fallback to the default mem space.
9349   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
9350   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
9351   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
9352   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
9353   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
9354   case OMPAllocateDeclAttr::OMPConstMemAlloc:
9355   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
9356     AS = LangAS::Default;
9357     return true;
9358   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
9359     llvm_unreachable("Expected predefined allocator for the variables with the "
9360                      "static storage.");
9361   }
9362   return false;
9363 }
9364 
9365 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
9366   return HasRequiresUnifiedSharedMemory;
9367 }
9368 
9369 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
9370     CodeGenModule &CGM)
9371     : CGM(CGM) {
9372   if (CGM.getLangOpts().OpenMPIsDevice) {
9373     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
9374     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
9375   }
9376 }
9377 
9378 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
9379   if (CGM.getLangOpts().OpenMPIsDevice)
9380     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
9381 }
9382 
9383 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
9384   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
9385     return true;
9386 
9387   StringRef Name = CGM.getMangledName(GD);
9388   const auto *D = cast<FunctionDecl>(GD.getDecl());
9389   // Do not to emit function if it is marked as declare target as it was already
9390   // emitted.
9391   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
9392     if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) {
9393       if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name)))
9394         return !F->isDeclaration();
9395       return false;
9396     }
9397     return true;
9398   }
9399 
9400   return !AlreadyEmittedTargetFunctions.insert(Name).second;
9401 }
9402 
9403 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
9404   // If we don't have entries or if we are emitting code for the device, we
9405   // don't need to do anything.
9406   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
9407       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
9408       (OffloadEntriesInfoManager.empty() &&
9409        !HasEmittedDeclareTargetRegion &&
9410        !HasEmittedTargetRegion))
9411     return nullptr;
9412 
9413   // Create and register the function that handles the requires directives.
9414   ASTContext &C = CGM.getContext();
9415 
9416   llvm::Function *RequiresRegFn;
9417   {
9418     CodeGenFunction CGF(CGM);
9419     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
9420     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
9421     std::string ReqName = getName({"omp_offloading", "requires_reg"});
9422     RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI);
9423     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
9424     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
9425     // TODO: check for other requires clauses.
9426     // The requires directive takes effect only when a target region is
9427     // present in the compilation unit. Otherwise it is ignored and not
9428     // passed to the runtime. This avoids the runtime from throwing an error
9429     // for mismatching requires clauses across compilation units that don't
9430     // contain at least 1 target region.
9431     assert((HasEmittedTargetRegion ||
9432             HasEmittedDeclareTargetRegion ||
9433             !OffloadEntriesInfoManager.empty()) &&
9434            "Target or declare target region expected.");
9435     if (HasRequiresUnifiedSharedMemory)
9436       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
9437     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires),
9438         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
9439     CGF.FinishFunction();
9440   }
9441   return RequiresRegFn;
9442 }
9443 
9444 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
9445   // If we have offloading in the current module, we need to emit the entries
9446   // now and register the offloading descriptor.
9447   createOffloadEntriesAndInfoMetadata();
9448 
9449   // Create and register the offloading binary descriptors. This is the main
9450   // entity that captures all the information about offloading in the current
9451   // compilation unit.
9452   return createOffloadingBinaryDescriptorRegistration();
9453 }
9454 
9455 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
9456                                     const OMPExecutableDirective &D,
9457                                     SourceLocation Loc,
9458                                     llvm::Function *OutlinedFn,
9459                                     ArrayRef<llvm::Value *> CapturedVars) {
9460   if (!CGF.HaveInsertPoint())
9461     return;
9462 
9463   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9464   CodeGenFunction::RunCleanupsScope Scope(CGF);
9465 
9466   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
9467   llvm::Value *Args[] = {
9468       RTLoc,
9469       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
9470       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
9471   llvm::SmallVector<llvm::Value *, 16> RealArgs;
9472   RealArgs.append(std::begin(Args), std::end(Args));
9473   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
9474 
9475   llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
9476   CGF.EmitRuntimeCall(RTLFn, RealArgs);
9477 }
9478 
9479 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
9480                                          const Expr *NumTeams,
9481                                          const Expr *ThreadLimit,
9482                                          SourceLocation Loc) {
9483   if (!CGF.HaveInsertPoint())
9484     return;
9485 
9486   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9487 
9488   llvm::Value *NumTeamsVal =
9489       NumTeams
9490           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
9491                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
9492           : CGF.Builder.getInt32(0);
9493 
9494   llvm::Value *ThreadLimitVal =
9495       ThreadLimit
9496           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
9497                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
9498           : CGF.Builder.getInt32(0);
9499 
9500   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
9501   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
9502                                      ThreadLimitVal};
9503   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
9504                       PushNumTeamsArgs);
9505 }
9506 
9507 void CGOpenMPRuntime::emitTargetDataCalls(
9508     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
9509     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
9510   if (!CGF.HaveInsertPoint())
9511     return;
9512 
9513   // Action used to replace the default codegen action and turn privatization
9514   // off.
9515   PrePostActionTy NoPrivAction;
9516 
9517   // Generate the code for the opening of the data environment. Capture all the
9518   // arguments of the runtime call by reference because they are used in the
9519   // closing of the region.
9520   auto &&BeginThenGen = [this, &D, Device, &Info,
9521                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
9522     // Fill up the arrays with all the mapped variables.
9523     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9524     MappableExprsHandler::MapValuesArrayTy Pointers;
9525     MappableExprsHandler::MapValuesArrayTy Sizes;
9526     MappableExprsHandler::MapFlagsArrayTy MapTypes;
9527 
9528     // Get map clause information.
9529     MappableExprsHandler MCHandler(D, CGF);
9530     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
9531 
9532     // Fill up the arrays and create the arguments.
9533     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9534 
9535     llvm::Value *BasePointersArrayArg = nullptr;
9536     llvm::Value *PointersArrayArg = nullptr;
9537     llvm::Value *SizesArrayArg = nullptr;
9538     llvm::Value *MapTypesArrayArg = nullptr;
9539     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
9540                                  SizesArrayArg, MapTypesArrayArg, Info);
9541 
9542     // Emit device ID if any.
9543     llvm::Value *DeviceID = nullptr;
9544     if (Device) {
9545       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9546                                            CGF.Int64Ty, /*isSigned=*/true);
9547     } else {
9548       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9549     }
9550 
9551     // Emit the number of elements in the offloading arrays.
9552     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
9553 
9554     llvm::Value *OffloadingArgs[] = {
9555         DeviceID,         PointerNum,    BasePointersArrayArg,
9556         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
9557     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin),
9558                         OffloadingArgs);
9559 
9560     // If device pointer privatization is required, emit the body of the region
9561     // here. It will have to be duplicated: with and without privatization.
9562     if (!Info.CaptureDeviceAddrMap.empty())
9563       CodeGen(CGF);
9564   };
9565 
9566   // Generate code for the closing of the data region.
9567   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
9568                                             PrePostActionTy &) {
9569     assert(Info.isValid() && "Invalid data environment closing arguments.");
9570 
9571     llvm::Value *BasePointersArrayArg = nullptr;
9572     llvm::Value *PointersArrayArg = nullptr;
9573     llvm::Value *SizesArrayArg = nullptr;
9574     llvm::Value *MapTypesArrayArg = nullptr;
9575     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
9576                                  SizesArrayArg, MapTypesArrayArg, Info);
9577 
9578     // Emit device ID if any.
9579     llvm::Value *DeviceID = nullptr;
9580     if (Device) {
9581       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9582                                            CGF.Int64Ty, /*isSigned=*/true);
9583     } else {
9584       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9585     }
9586 
9587     // Emit the number of elements in the offloading arrays.
9588     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
9589 
9590     llvm::Value *OffloadingArgs[] = {
9591         DeviceID,         PointerNum,    BasePointersArrayArg,
9592         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
9593     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end),
9594                         OffloadingArgs);
9595   };
9596 
9597   // If we need device pointer privatization, we need to emit the body of the
9598   // region with no privatization in the 'else' branch of the conditional.
9599   // Otherwise, we don't have to do anything.
9600   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
9601                                                          PrePostActionTy &) {
9602     if (!Info.CaptureDeviceAddrMap.empty()) {
9603       CodeGen.setAction(NoPrivAction);
9604       CodeGen(CGF);
9605     }
9606   };
9607 
9608   // We don't have to do anything to close the region if the if clause evaluates
9609   // to false.
9610   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
9611 
9612   if (IfCond) {
9613     emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
9614   } else {
9615     RegionCodeGenTy RCG(BeginThenGen);
9616     RCG(CGF);
9617   }
9618 
9619   // If we don't require privatization of device pointers, we emit the body in
9620   // between the runtime calls. This avoids duplicating the body code.
9621   if (Info.CaptureDeviceAddrMap.empty()) {
9622     CodeGen.setAction(NoPrivAction);
9623     CodeGen(CGF);
9624   }
9625 
9626   if (IfCond) {
9627     emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen);
9628   } else {
9629     RegionCodeGenTy RCG(EndThenGen);
9630     RCG(CGF);
9631   }
9632 }
9633 
9634 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
9635     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
9636     const Expr *Device) {
9637   if (!CGF.HaveInsertPoint())
9638     return;
9639 
9640   assert((isa<OMPTargetEnterDataDirective>(D) ||
9641           isa<OMPTargetExitDataDirective>(D) ||
9642           isa<OMPTargetUpdateDirective>(D)) &&
9643          "Expecting either target enter, exit data, or update directives.");
9644 
9645   CodeGenFunction::OMPTargetDataInfo InputInfo;
9646   llvm::Value *MapTypesArray = nullptr;
9647   // Generate the code for the opening of the data environment.
9648   auto &&ThenGen = [this, &D, Device, &InputInfo,
9649                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
9650     // Emit device ID if any.
9651     llvm::Value *DeviceID = nullptr;
9652     if (Device) {
9653       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9654                                            CGF.Int64Ty, /*isSigned=*/true);
9655     } else {
9656       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9657     }
9658 
9659     // Emit the number of elements in the offloading arrays.
9660     llvm::Constant *PointerNum =
9661         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9662 
9663     llvm::Value *OffloadingArgs[] = {DeviceID,
9664                                      PointerNum,
9665                                      InputInfo.BasePointersArray.getPointer(),
9666                                      InputInfo.PointersArray.getPointer(),
9667                                      InputInfo.SizesArray.getPointer(),
9668                                      MapTypesArray};
9669 
9670     // Select the right runtime function call for each expected standalone
9671     // directive.
9672     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9673     OpenMPRTLFunction RTLFn;
9674     switch (D.getDirectiveKind()) {
9675     case OMPD_target_enter_data:
9676       RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
9677                         : OMPRTL__tgt_target_data_begin;
9678       break;
9679     case OMPD_target_exit_data:
9680       RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
9681                         : OMPRTL__tgt_target_data_end;
9682       break;
9683     case OMPD_target_update:
9684       RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
9685                         : OMPRTL__tgt_target_data_update;
9686       break;
9687     case OMPD_parallel:
9688     case OMPD_for:
9689     case OMPD_parallel_for:
9690     case OMPD_parallel_sections:
9691     case OMPD_for_simd:
9692     case OMPD_parallel_for_simd:
9693     case OMPD_cancel:
9694     case OMPD_cancellation_point:
9695     case OMPD_ordered:
9696     case OMPD_threadprivate:
9697     case OMPD_allocate:
9698     case OMPD_task:
9699     case OMPD_simd:
9700     case OMPD_sections:
9701     case OMPD_section:
9702     case OMPD_single:
9703     case OMPD_master:
9704     case OMPD_critical:
9705     case OMPD_taskyield:
9706     case OMPD_barrier:
9707     case OMPD_taskwait:
9708     case OMPD_taskgroup:
9709     case OMPD_atomic:
9710     case OMPD_flush:
9711     case OMPD_teams:
9712     case OMPD_target_data:
9713     case OMPD_distribute:
9714     case OMPD_distribute_simd:
9715     case OMPD_distribute_parallel_for:
9716     case OMPD_distribute_parallel_for_simd:
9717     case OMPD_teams_distribute:
9718     case OMPD_teams_distribute_simd:
9719     case OMPD_teams_distribute_parallel_for:
9720     case OMPD_teams_distribute_parallel_for_simd:
9721     case OMPD_declare_simd:
9722     case OMPD_declare_target:
9723     case OMPD_end_declare_target:
9724     case OMPD_declare_reduction:
9725     case OMPD_declare_mapper:
9726     case OMPD_taskloop:
9727     case OMPD_taskloop_simd:
9728     case OMPD_target:
9729     case OMPD_target_simd:
9730     case OMPD_target_teams_distribute:
9731     case OMPD_target_teams_distribute_simd:
9732     case OMPD_target_teams_distribute_parallel_for:
9733     case OMPD_target_teams_distribute_parallel_for_simd:
9734     case OMPD_target_teams:
9735     case OMPD_target_parallel:
9736     case OMPD_target_parallel_for:
9737     case OMPD_target_parallel_for_simd:
9738     case OMPD_requires:
9739     case OMPD_unknown:
9740       llvm_unreachable("Unexpected standalone target data directive.");
9741       break;
9742     }
9743     CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
9744   };
9745 
9746   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
9747                              CodeGenFunction &CGF, PrePostActionTy &) {
9748     // Fill up the arrays with all the mapped variables.
9749     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9750     MappableExprsHandler::MapValuesArrayTy Pointers;
9751     MappableExprsHandler::MapValuesArrayTy Sizes;
9752     MappableExprsHandler::MapFlagsArrayTy MapTypes;
9753 
9754     // Get map clause information.
9755     MappableExprsHandler MEHandler(D, CGF);
9756     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
9757 
9758     TargetDataInfo Info;
9759     // Fill up the arrays and create the arguments.
9760     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9761     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
9762                                  Info.PointersArray, Info.SizesArray,
9763                                  Info.MapTypesArray, Info);
9764     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9765     InputInfo.BasePointersArray =
9766         Address(Info.BasePointersArray, CGM.getPointerAlign());
9767     InputInfo.PointersArray =
9768         Address(Info.PointersArray, CGM.getPointerAlign());
9769     InputInfo.SizesArray =
9770         Address(Info.SizesArray, CGM.getPointerAlign());
9771     MapTypesArray = Info.MapTypesArray;
9772     if (D.hasClausesOfKind<OMPDependClause>())
9773       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9774     else
9775       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9776   };
9777 
9778   if (IfCond) {
9779     emitOMPIfClause(CGF, IfCond, TargetThenGen,
9780                     [](CodeGenFunction &CGF, PrePostActionTy &) {});
9781   } else {
9782     RegionCodeGenTy ThenRCG(TargetThenGen);
9783     ThenRCG(CGF);
9784   }
9785 }
9786 
9787 namespace {
9788   /// Kind of parameter in a function with 'declare simd' directive.
9789   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
9790   /// Attribute set of the parameter.
9791   struct ParamAttrTy {
9792     ParamKindTy Kind = Vector;
9793     llvm::APSInt StrideOrArg;
9794     llvm::APSInt Alignment;
9795   };
9796 } // namespace
9797 
9798 static unsigned evaluateCDTSize(const FunctionDecl *FD,
9799                                 ArrayRef<ParamAttrTy> ParamAttrs) {
9800   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
9801   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
9802   // of that clause. The VLEN value must be power of 2.
9803   // In other case the notion of the function`s "characteristic data type" (CDT)
9804   // is used to compute the vector length.
9805   // CDT is defined in the following order:
9806   //   a) For non-void function, the CDT is the return type.
9807   //   b) If the function has any non-uniform, non-linear parameters, then the
9808   //   CDT is the type of the first such parameter.
9809   //   c) If the CDT determined by a) or b) above is struct, union, or class
9810   //   type which is pass-by-value (except for the type that maps to the
9811   //   built-in complex data type), the characteristic data type is int.
9812   //   d) If none of the above three cases is applicable, the CDT is int.
9813   // The VLEN is then determined based on the CDT and the size of vector
9814   // register of that ISA for which current vector version is generated. The
9815   // VLEN is computed using the formula below:
9816   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
9817   // where vector register size specified in section 3.2.1 Registers and the
9818   // Stack Frame of original AMD64 ABI document.
9819   QualType RetType = FD->getReturnType();
9820   if (RetType.isNull())
9821     return 0;
9822   ASTContext &C = FD->getASTContext();
9823   QualType CDT;
9824   if (!RetType.isNull() && !RetType->isVoidType()) {
9825     CDT = RetType;
9826   } else {
9827     unsigned Offset = 0;
9828     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
9829       if (ParamAttrs[Offset].Kind == Vector)
9830         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
9831       ++Offset;
9832     }
9833     if (CDT.isNull()) {
9834       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
9835         if (ParamAttrs[I + Offset].Kind == Vector) {
9836           CDT = FD->getParamDecl(I)->getType();
9837           break;
9838         }
9839       }
9840     }
9841   }
9842   if (CDT.isNull())
9843     CDT = C.IntTy;
9844   CDT = CDT->getCanonicalTypeUnqualified();
9845   if (CDT->isRecordType() || CDT->isUnionType())
9846     CDT = C.IntTy;
9847   return C.getTypeSize(CDT);
9848 }
9849 
9850 static void
9851 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
9852                            const llvm::APSInt &VLENVal,
9853                            ArrayRef<ParamAttrTy> ParamAttrs,
9854                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
9855   struct ISADataTy {
9856     char ISA;
9857     unsigned VecRegSize;
9858   };
9859   ISADataTy ISAData[] = {
9860       {
9861           'b', 128
9862       }, // SSE
9863       {
9864           'c', 256
9865       }, // AVX
9866       {
9867           'd', 256
9868       }, // AVX2
9869       {
9870           'e', 512
9871       }, // AVX512
9872   };
9873   llvm::SmallVector<char, 2> Masked;
9874   switch (State) {
9875   case OMPDeclareSimdDeclAttr::BS_Undefined:
9876     Masked.push_back('N');
9877     Masked.push_back('M');
9878     break;
9879   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
9880     Masked.push_back('N');
9881     break;
9882   case OMPDeclareSimdDeclAttr::BS_Inbranch:
9883     Masked.push_back('M');
9884     break;
9885   }
9886   for (char Mask : Masked) {
9887     for (const ISADataTy &Data : ISAData) {
9888       SmallString<256> Buffer;
9889       llvm::raw_svector_ostream Out(Buffer);
9890       Out << "_ZGV" << Data.ISA << Mask;
9891       if (!VLENVal) {
9892         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
9893         assert(NumElts && "Non-zero simdlen/cdtsize expected");
9894         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
9895       } else {
9896         Out << VLENVal;
9897       }
9898       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
9899         switch (ParamAttr.Kind){
9900         case LinearWithVarStride:
9901           Out << 's' << ParamAttr.StrideOrArg;
9902           break;
9903         case Linear:
9904           Out << 'l';
9905           if (!!ParamAttr.StrideOrArg)
9906             Out << ParamAttr.StrideOrArg;
9907           break;
9908         case Uniform:
9909           Out << 'u';
9910           break;
9911         case Vector:
9912           Out << 'v';
9913           break;
9914         }
9915         if (!!ParamAttr.Alignment)
9916           Out << 'a' << ParamAttr.Alignment;
9917       }
9918       Out << '_' << Fn->getName();
9919       Fn->addFnAttr(Out.str());
9920     }
9921   }
9922 }
9923 
9924 // This are the Functions that are needed to mangle the name of the
9925 // vector functions generated by the compiler, according to the rules
9926 // defined in the "Vector Function ABI specifications for AArch64",
9927 // available at
9928 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
9929 
9930 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
9931 ///
9932 /// TODO: Need to implement the behavior for reference marked with a
9933 /// var or no linear modifiers (1.b in the section). For this, we
9934 /// need to extend ParamKindTy to support the linear modifiers.
9935 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
9936   QT = QT.getCanonicalType();
9937 
9938   if (QT->isVoidType())
9939     return false;
9940 
9941   if (Kind == ParamKindTy::Uniform)
9942     return false;
9943 
9944   if (Kind == ParamKindTy::Linear)
9945     return false;
9946 
9947   // TODO: Handle linear references with modifiers
9948 
9949   if (Kind == ParamKindTy::LinearWithVarStride)
9950     return false;
9951 
9952   return true;
9953 }
9954 
9955 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
9956 static bool getAArch64PBV(QualType QT, ASTContext &C) {
9957   QT = QT.getCanonicalType();
9958   unsigned Size = C.getTypeSize(QT);
9959 
9960   // Only scalars and complex within 16 bytes wide set PVB to true.
9961   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
9962     return false;
9963 
9964   if (QT->isFloatingType())
9965     return true;
9966 
9967   if (QT->isIntegerType())
9968     return true;
9969 
9970   if (QT->isPointerType())
9971     return true;
9972 
9973   // TODO: Add support for complex types (section 3.1.2, item 2).
9974 
9975   return false;
9976 }
9977 
9978 /// Computes the lane size (LS) of a return type or of an input parameter,
9979 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
9980 /// TODO: Add support for references, section 3.2.1, item 1.
9981 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
9982   if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
9983     QualType PTy = QT.getCanonicalType()->getPointeeType();
9984     if (getAArch64PBV(PTy, C))
9985       return C.getTypeSize(PTy);
9986   }
9987   if (getAArch64PBV(QT, C))
9988     return C.getTypeSize(QT);
9989 
9990   return C.getTypeSize(C.getUIntPtrType());
9991 }
9992 
9993 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
9994 // signature of the scalar function, as defined in 3.2.2 of the
9995 // AAVFABI.
9996 static std::tuple<unsigned, unsigned, bool>
9997 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
9998   QualType RetType = FD->getReturnType().getCanonicalType();
9999 
10000   ASTContext &C = FD->getASTContext();
10001 
10002   bool OutputBecomesInput = false;
10003 
10004   llvm::SmallVector<unsigned, 8> Sizes;
10005   if (!RetType->isVoidType()) {
10006     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10007     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10008       OutputBecomesInput = true;
10009   }
10010   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10011     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10012     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10013   }
10014 
10015   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10016   // The LS of a function parameter / return value can only be a power
10017   // of 2, starting from 8 bits, up to 128.
10018   assert(std::all_of(Sizes.begin(), Sizes.end(),
10019                      [](unsigned Size) {
10020                        return Size == 8 || Size == 16 || Size == 32 ||
10021                               Size == 64 || Size == 128;
10022                      }) &&
10023          "Invalid size");
10024 
10025   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10026                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
10027                          OutputBecomesInput);
10028 }
10029 
10030 /// Mangle the parameter part of the vector function name according to
10031 /// their OpenMP classification. The mangling function is defined in
10032 /// section 3.5 of the AAVFABI.
10033 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10034   SmallString<256> Buffer;
10035   llvm::raw_svector_ostream Out(Buffer);
10036   for (const auto &ParamAttr : ParamAttrs) {
10037     switch (ParamAttr.Kind) {
10038     case LinearWithVarStride:
10039       Out << "ls" << ParamAttr.StrideOrArg;
10040       break;
10041     case Linear:
10042       Out << 'l';
10043       // Don't print the step value if it is not present or if it is
10044       // equal to 1.
10045       if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1)
10046         Out << ParamAttr.StrideOrArg;
10047       break;
10048     case Uniform:
10049       Out << 'u';
10050       break;
10051     case Vector:
10052       Out << 'v';
10053       break;
10054     }
10055 
10056     if (!!ParamAttr.Alignment)
10057       Out << 'a' << ParamAttr.Alignment;
10058   }
10059 
10060   return Out.str();
10061 }
10062 
10063 // Function used to add the attribute. The parameter `VLEN` is
10064 // templated to allow the use of "x" when targeting scalable functions
10065 // for SVE.
10066 template <typename T>
10067 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10068                                  char ISA, StringRef ParSeq,
10069                                  StringRef MangledName, bool OutputBecomesInput,
10070                                  llvm::Function *Fn) {
10071   SmallString<256> Buffer;
10072   llvm::raw_svector_ostream Out(Buffer);
10073   Out << Prefix << ISA << LMask << VLEN;
10074   if (OutputBecomesInput)
10075     Out << "v";
10076   Out << ParSeq << "_" << MangledName;
10077   Fn->addFnAttr(Out.str());
10078 }
10079 
10080 // Helper function to generate the Advanced SIMD names depending on
10081 // the value of the NDS when simdlen is not present.
10082 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10083                                       StringRef Prefix, char ISA,
10084                                       StringRef ParSeq, StringRef MangledName,
10085                                       bool OutputBecomesInput,
10086                                       llvm::Function *Fn) {
10087   switch (NDS) {
10088   case 8:
10089     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10090                          OutputBecomesInput, Fn);
10091     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10092                          OutputBecomesInput, Fn);
10093     break;
10094   case 16:
10095     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10096                          OutputBecomesInput, Fn);
10097     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10098                          OutputBecomesInput, Fn);
10099     break;
10100   case 32:
10101     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10102                          OutputBecomesInput, Fn);
10103     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10104                          OutputBecomesInput, Fn);
10105     break;
10106   case 64:
10107   case 128:
10108     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10109                          OutputBecomesInput, Fn);
10110     break;
10111   default:
10112     llvm_unreachable("Scalar type is too wide.");
10113   }
10114 }
10115 
10116 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10117 static void emitAArch64DeclareSimdFunction(
10118     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10119     ArrayRef<ParamAttrTy> ParamAttrs,
10120     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10121     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10122 
10123   // Get basic data for building the vector signature.
10124   const auto Data = getNDSWDS(FD, ParamAttrs);
10125   const unsigned NDS = std::get<0>(Data);
10126   const unsigned WDS = std::get<1>(Data);
10127   const bool OutputBecomesInput = std::get<2>(Data);
10128 
10129   // Check the values provided via `simdlen` by the user.
10130   // 1. A `simdlen(1)` doesn't produce vector signatures,
10131   if (UserVLEN == 1) {
10132     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10133         DiagnosticsEngine::Warning,
10134         "The clause simdlen(1) has no effect when targeting aarch64.");
10135     CGM.getDiags().Report(SLoc, DiagID);
10136     return;
10137   }
10138 
10139   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10140   // Advanced SIMD output.
10141   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10142     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10143         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10144                                     "power of 2 when targeting Advanced SIMD.");
10145     CGM.getDiags().Report(SLoc, DiagID);
10146     return;
10147   }
10148 
10149   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10150   // limits.
10151   if (ISA == 's' && UserVLEN != 0) {
10152     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10153       unsigned DiagID = CGM.getDiags().getCustomDiagID(
10154           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10155                                       "lanes in the architectural constraints "
10156                                       "for SVE (min is 128-bit, max is "
10157                                       "2048-bit, by steps of 128-bit)");
10158       CGM.getDiags().Report(SLoc, DiagID) << WDS;
10159       return;
10160     }
10161   }
10162 
10163   // Sort out parameter sequence.
10164   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10165   StringRef Prefix = "_ZGV";
10166   // Generate simdlen from user input (if any).
10167   if (UserVLEN) {
10168     if (ISA == 's') {
10169       // SVE generates only a masked function.
10170       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10171                            OutputBecomesInput, Fn);
10172     } else {
10173       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10174       // Advanced SIMD generates one or two functions, depending on
10175       // the `[not]inbranch` clause.
10176       switch (State) {
10177       case OMPDeclareSimdDeclAttr::BS_Undefined:
10178         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10179                              OutputBecomesInput, Fn);
10180         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10181                              OutputBecomesInput, Fn);
10182         break;
10183       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10184         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10185                              OutputBecomesInput, Fn);
10186         break;
10187       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10188         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10189                              OutputBecomesInput, Fn);
10190         break;
10191       }
10192     }
10193   } else {
10194     // If no user simdlen is provided, follow the AAVFABI rules for
10195     // generating the vector length.
10196     if (ISA == 's') {
10197       // SVE, section 3.4.1, item 1.
10198       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10199                            OutputBecomesInput, Fn);
10200     } else {
10201       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10202       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10203       // two vector names depending on the use of the clause
10204       // `[not]inbranch`.
10205       switch (State) {
10206       case OMPDeclareSimdDeclAttr::BS_Undefined:
10207         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10208                                   OutputBecomesInput, Fn);
10209         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10210                                   OutputBecomesInput, Fn);
10211         break;
10212       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10213         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10214                                   OutputBecomesInput, Fn);
10215         break;
10216       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10217         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10218                                   OutputBecomesInput, Fn);
10219         break;
10220       }
10221     }
10222   }
10223 }
10224 
10225 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10226                                               llvm::Function *Fn) {
10227   ASTContext &C = CGM.getContext();
10228   FD = FD->getMostRecentDecl();
10229   // Map params to their positions in function decl.
10230   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10231   if (isa<CXXMethodDecl>(FD))
10232     ParamPositions.try_emplace(FD, 0);
10233   unsigned ParamPos = ParamPositions.size();
10234   for (const ParmVarDecl *P : FD->parameters()) {
10235     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10236     ++ParamPos;
10237   }
10238   while (FD) {
10239     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10240       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10241       // Mark uniform parameters.
10242       for (const Expr *E : Attr->uniforms()) {
10243         E = E->IgnoreParenImpCasts();
10244         unsigned Pos;
10245         if (isa<CXXThisExpr>(E)) {
10246           Pos = ParamPositions[FD];
10247         } else {
10248           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10249                                 ->getCanonicalDecl();
10250           Pos = ParamPositions[PVD];
10251         }
10252         ParamAttrs[Pos].Kind = Uniform;
10253       }
10254       // Get alignment info.
10255       auto NI = Attr->alignments_begin();
10256       for (const Expr *E : Attr->aligneds()) {
10257         E = E->IgnoreParenImpCasts();
10258         unsigned Pos;
10259         QualType ParmTy;
10260         if (isa<CXXThisExpr>(E)) {
10261           Pos = ParamPositions[FD];
10262           ParmTy = E->getType();
10263         } else {
10264           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10265                                 ->getCanonicalDecl();
10266           Pos = ParamPositions[PVD];
10267           ParmTy = PVD->getType();
10268         }
10269         ParamAttrs[Pos].Alignment =
10270             (*NI)
10271                 ? (*NI)->EvaluateKnownConstInt(C)
10272                 : llvm::APSInt::getUnsigned(
10273                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10274                           .getQuantity());
10275         ++NI;
10276       }
10277       // Mark linear parameters.
10278       auto SI = Attr->steps_begin();
10279       auto MI = Attr->modifiers_begin();
10280       for (const Expr *E : Attr->linears()) {
10281         E = E->IgnoreParenImpCasts();
10282         unsigned Pos;
10283         if (isa<CXXThisExpr>(E)) {
10284           Pos = ParamPositions[FD];
10285         } else {
10286           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10287                                 ->getCanonicalDecl();
10288           Pos = ParamPositions[PVD];
10289         }
10290         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
10291         ParamAttr.Kind = Linear;
10292         if (*SI) {
10293           Expr::EvalResult Result;
10294           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
10295             if (const auto *DRE =
10296                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
10297               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
10298                 ParamAttr.Kind = LinearWithVarStride;
10299                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
10300                     ParamPositions[StridePVD->getCanonicalDecl()]);
10301               }
10302             }
10303           } else {
10304             ParamAttr.StrideOrArg = Result.Val.getInt();
10305           }
10306         }
10307         ++SI;
10308         ++MI;
10309       }
10310       llvm::APSInt VLENVal;
10311       SourceLocation ExprLoc;
10312       const Expr *VLENExpr = Attr->getSimdlen();
10313       if (VLENExpr) {
10314         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
10315         ExprLoc = VLENExpr->getExprLoc();
10316       }
10317       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
10318       if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
10319           CGM.getTriple().getArch() == llvm::Triple::x86_64) {
10320         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
10321       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
10322         unsigned VLEN = VLENVal.getExtValue();
10323         StringRef MangledName = Fn->getName();
10324         if (CGM.getTarget().hasFeature("sve"))
10325           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10326                                          MangledName, 's', 128, Fn, ExprLoc);
10327         if (CGM.getTarget().hasFeature("neon"))
10328           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10329                                          MangledName, 'n', 128, Fn, ExprLoc);
10330       }
10331     }
10332     FD = FD->getPreviousDecl();
10333   }
10334 }
10335 
10336 namespace {
10337 /// Cleanup action for doacross support.
10338 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
10339 public:
10340   static const int DoacrossFinArgs = 2;
10341 
10342 private:
10343   llvm::FunctionCallee RTLFn;
10344   llvm::Value *Args[DoacrossFinArgs];
10345 
10346 public:
10347   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
10348                     ArrayRef<llvm::Value *> CallArgs)
10349       : RTLFn(RTLFn) {
10350     assert(CallArgs.size() == DoacrossFinArgs);
10351     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10352   }
10353   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10354     if (!CGF.HaveInsertPoint())
10355       return;
10356     CGF.EmitRuntimeCall(RTLFn, Args);
10357   }
10358 };
10359 } // namespace
10360 
10361 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
10362                                        const OMPLoopDirective &D,
10363                                        ArrayRef<Expr *> NumIterations) {
10364   if (!CGF.HaveInsertPoint())
10365     return;
10366 
10367   ASTContext &C = CGM.getContext();
10368   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
10369   RecordDecl *RD;
10370   if (KmpDimTy.isNull()) {
10371     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
10372     //  kmp_int64 lo; // lower
10373     //  kmp_int64 up; // upper
10374     //  kmp_int64 st; // stride
10375     // };
10376     RD = C.buildImplicitRecord("kmp_dim");
10377     RD->startDefinition();
10378     addFieldToRecordDecl(C, RD, Int64Ty);
10379     addFieldToRecordDecl(C, RD, Int64Ty);
10380     addFieldToRecordDecl(C, RD, Int64Ty);
10381     RD->completeDefinition();
10382     KmpDimTy = C.getRecordType(RD);
10383   } else {
10384     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
10385   }
10386   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
10387   QualType ArrayTy =
10388       C.getConstantArrayType(KmpDimTy, Size, ArrayType::Normal, 0);
10389 
10390   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
10391   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
10392   enum { LowerFD = 0, UpperFD, StrideFD };
10393   // Fill dims with data.
10394   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
10395     LValue DimsLVal = CGF.MakeAddrLValue(
10396         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
10397     // dims.upper = num_iterations;
10398     LValue UpperLVal = CGF.EmitLValueForField(
10399         DimsLVal, *std::next(RD->field_begin(), UpperFD));
10400     llvm::Value *NumIterVal =
10401         CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]),
10402                                  D.getNumIterations()->getType(), Int64Ty,
10403                                  D.getNumIterations()->getExprLoc());
10404     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
10405     // dims.stride = 1;
10406     LValue StrideLVal = CGF.EmitLValueForField(
10407         DimsLVal, *std::next(RD->field_begin(), StrideFD));
10408     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
10409                           StrideLVal);
10410   }
10411 
10412   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
10413   // kmp_int32 num_dims, struct kmp_dim * dims);
10414   llvm::Value *Args[] = {
10415       emitUpdateLocation(CGF, D.getBeginLoc()),
10416       getThreadID(CGF, D.getBeginLoc()),
10417       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
10418       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
10419           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
10420           CGM.VoidPtrTy)};
10421 
10422   llvm::FunctionCallee RTLFn =
10423       createRuntimeFunction(OMPRTL__kmpc_doacross_init);
10424   CGF.EmitRuntimeCall(RTLFn, Args);
10425   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
10426       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
10427   llvm::FunctionCallee FiniRTLFn =
10428       createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
10429   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
10430                                              llvm::makeArrayRef(FiniArgs));
10431 }
10432 
10433 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
10434                                           const OMPDependClause *C) {
10435   QualType Int64Ty =
10436       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
10437   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
10438   QualType ArrayTy = CGM.getContext().getConstantArrayType(
10439       Int64Ty, Size, ArrayType::Normal, 0);
10440   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
10441   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
10442     const Expr *CounterVal = C->getLoopData(I);
10443     assert(CounterVal);
10444     llvm::Value *CntVal = CGF.EmitScalarConversion(
10445         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
10446         CounterVal->getExprLoc());
10447     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
10448                           /*Volatile=*/false, Int64Ty);
10449   }
10450   llvm::Value *Args[] = {
10451       emitUpdateLocation(CGF, C->getBeginLoc()),
10452       getThreadID(CGF, C->getBeginLoc()),
10453       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
10454   llvm::FunctionCallee RTLFn;
10455   if (C->getDependencyKind() == OMPC_DEPEND_source) {
10456     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
10457   } else {
10458     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
10459     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
10460   }
10461   CGF.EmitRuntimeCall(RTLFn, Args);
10462 }
10463 
10464 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
10465                                llvm::FunctionCallee Callee,
10466                                ArrayRef<llvm::Value *> Args) const {
10467   assert(Loc.isValid() && "Outlined function call location must be valid.");
10468   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
10469 
10470   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
10471     if (Fn->doesNotThrow()) {
10472       CGF.EmitNounwindRuntimeCall(Fn, Args);
10473       return;
10474     }
10475   }
10476   CGF.EmitRuntimeCall(Callee, Args);
10477 }
10478 
10479 void CGOpenMPRuntime::emitOutlinedFunctionCall(
10480     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
10481     ArrayRef<llvm::Value *> Args) const {
10482   emitCall(CGF, Loc, OutlinedFn, Args);
10483 }
10484 
10485 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
10486   if (const auto *FD = dyn_cast<FunctionDecl>(D))
10487     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
10488       HasEmittedDeclareTargetRegion = true;
10489 }
10490 
10491 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
10492                                              const VarDecl *NativeParam,
10493                                              const VarDecl *TargetParam) const {
10494   return CGF.GetAddrOfLocalVar(NativeParam);
10495 }
10496 
10497 namespace {
10498 /// Cleanup action for allocate support.
10499 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
10500 public:
10501   static const int CleanupArgs = 3;
10502 
10503 private:
10504   llvm::FunctionCallee RTLFn;
10505   llvm::Value *Args[CleanupArgs];
10506 
10507 public:
10508   OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
10509                        ArrayRef<llvm::Value *> CallArgs)
10510       : RTLFn(RTLFn) {
10511     assert(CallArgs.size() == CleanupArgs &&
10512            "Size of arguments does not match.");
10513     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10514   }
10515   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10516     if (!CGF.HaveInsertPoint())
10517       return;
10518     CGF.EmitRuntimeCall(RTLFn, Args);
10519   }
10520 };
10521 } // namespace
10522 
10523 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
10524                                                    const VarDecl *VD) {
10525   if (!VD)
10526     return Address::invalid();
10527   const VarDecl *CVD = VD->getCanonicalDecl();
10528   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
10529     return Address::invalid();
10530   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
10531   // Use the default allocation.
10532   if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
10533       !AA->getAllocator())
10534     return Address::invalid();
10535   llvm::Value *Size;
10536   CharUnits Align = CGM.getContext().getDeclAlign(CVD);
10537   if (CVD->getType()->isVariablyModifiedType()) {
10538     Size = CGF.getTypeSize(CVD->getType());
10539     // Align the size: ((size + align - 1) / align) * align
10540     Size = CGF.Builder.CreateNUWAdd(
10541         Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
10542     Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
10543     Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
10544   } else {
10545     CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
10546     Size = CGM.getSize(Sz.alignTo(Align));
10547   }
10548   llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
10549   assert(AA->getAllocator() &&
10550          "Expected allocator expression for non-default allocator.");
10551   llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
10552   // According to the standard, the original allocator type is a enum (integer).
10553   // Convert to pointer type, if required.
10554   if (Allocator->getType()->isIntegerTy())
10555     Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
10556   else if (Allocator->getType()->isPointerTy())
10557     Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
10558                                                                 CGM.VoidPtrTy);
10559   llvm::Value *Args[] = {ThreadID, Size, Allocator};
10560 
10561   llvm::Value *Addr =
10562       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
10563                           CVD->getName() + ".void.addr");
10564   llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
10565                                                               Allocator};
10566   llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
10567 
10568   CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
10569                                                 llvm::makeArrayRef(FiniArgs));
10570   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
10571       Addr,
10572       CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
10573       CVD->getName() + ".addr");
10574   return Address(Addr, Align);
10575 }
10576 
10577 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
10578     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
10579     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
10580   llvm_unreachable("Not supported in SIMD-only mode");
10581 }
10582 
10583 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
10584     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
10585     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
10586   llvm_unreachable("Not supported in SIMD-only mode");
10587 }
10588 
10589 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
10590     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
10591     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
10592     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
10593     bool Tied, unsigned &NumberOfParts) {
10594   llvm_unreachable("Not supported in SIMD-only mode");
10595 }
10596 
10597 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
10598                                            SourceLocation Loc,
10599                                            llvm::Function *OutlinedFn,
10600                                            ArrayRef<llvm::Value *> CapturedVars,
10601                                            const Expr *IfCond) {
10602   llvm_unreachable("Not supported in SIMD-only mode");
10603 }
10604 
10605 void CGOpenMPSIMDRuntime::emitCriticalRegion(
10606     CodeGenFunction &CGF, StringRef CriticalName,
10607     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
10608     const Expr *Hint) {
10609   llvm_unreachable("Not supported in SIMD-only mode");
10610 }
10611 
10612 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
10613                                            const RegionCodeGenTy &MasterOpGen,
10614                                            SourceLocation Loc) {
10615   llvm_unreachable("Not supported in SIMD-only mode");
10616 }
10617 
10618 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
10619                                             SourceLocation Loc) {
10620   llvm_unreachable("Not supported in SIMD-only mode");
10621 }
10622 
10623 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
10624     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
10625     SourceLocation Loc) {
10626   llvm_unreachable("Not supported in SIMD-only mode");
10627 }
10628 
10629 void CGOpenMPSIMDRuntime::emitSingleRegion(
10630     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
10631     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
10632     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
10633     ArrayRef<const Expr *> AssignmentOps) {
10634   llvm_unreachable("Not supported in SIMD-only mode");
10635 }
10636 
10637 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
10638                                             const RegionCodeGenTy &OrderedOpGen,
10639                                             SourceLocation Loc,
10640                                             bool IsThreads) {
10641   llvm_unreachable("Not supported in SIMD-only mode");
10642 }
10643 
10644 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
10645                                           SourceLocation Loc,
10646                                           OpenMPDirectiveKind Kind,
10647                                           bool EmitChecks,
10648                                           bool ForceSimpleCall) {
10649   llvm_unreachable("Not supported in SIMD-only mode");
10650 }
10651 
10652 void CGOpenMPSIMDRuntime::emitForDispatchInit(
10653     CodeGenFunction &CGF, SourceLocation Loc,
10654     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
10655     bool Ordered, const DispatchRTInput &DispatchValues) {
10656   llvm_unreachable("Not supported in SIMD-only mode");
10657 }
10658 
10659 void CGOpenMPSIMDRuntime::emitForStaticInit(
10660     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
10661     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
10662   llvm_unreachable("Not supported in SIMD-only mode");
10663 }
10664 
10665 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
10666     CodeGenFunction &CGF, SourceLocation Loc,
10667     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
10668   llvm_unreachable("Not supported in SIMD-only mode");
10669 }
10670 
10671 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
10672                                                      SourceLocation Loc,
10673                                                      unsigned IVSize,
10674                                                      bool IVSigned) {
10675   llvm_unreachable("Not supported in SIMD-only mode");
10676 }
10677 
10678 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
10679                                               SourceLocation Loc,
10680                                               OpenMPDirectiveKind DKind) {
10681   llvm_unreachable("Not supported in SIMD-only mode");
10682 }
10683 
10684 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
10685                                               SourceLocation Loc,
10686                                               unsigned IVSize, bool IVSigned,
10687                                               Address IL, Address LB,
10688                                               Address UB, Address ST) {
10689   llvm_unreachable("Not supported in SIMD-only mode");
10690 }
10691 
10692 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
10693                                                llvm::Value *NumThreads,
10694                                                SourceLocation Loc) {
10695   llvm_unreachable("Not supported in SIMD-only mode");
10696 }
10697 
10698 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
10699                                              OpenMPProcBindClauseKind ProcBind,
10700                                              SourceLocation Loc) {
10701   llvm_unreachable("Not supported in SIMD-only mode");
10702 }
10703 
10704 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
10705                                                     const VarDecl *VD,
10706                                                     Address VDAddr,
10707                                                     SourceLocation Loc) {
10708   llvm_unreachable("Not supported in SIMD-only mode");
10709 }
10710 
10711 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
10712     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
10713     CodeGenFunction *CGF) {
10714   llvm_unreachable("Not supported in SIMD-only mode");
10715 }
10716 
10717 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
10718     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
10719   llvm_unreachable("Not supported in SIMD-only mode");
10720 }
10721 
10722 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
10723                                     ArrayRef<const Expr *> Vars,
10724                                     SourceLocation Loc) {
10725   llvm_unreachable("Not supported in SIMD-only mode");
10726 }
10727 
10728 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
10729                                        const OMPExecutableDirective &D,
10730                                        llvm::Function *TaskFunction,
10731                                        QualType SharedsTy, Address Shareds,
10732                                        const Expr *IfCond,
10733                                        const OMPTaskDataTy &Data) {
10734   llvm_unreachable("Not supported in SIMD-only mode");
10735 }
10736 
10737 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
10738     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
10739     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
10740     const Expr *IfCond, const OMPTaskDataTy &Data) {
10741   llvm_unreachable("Not supported in SIMD-only mode");
10742 }
10743 
10744 void CGOpenMPSIMDRuntime::emitReduction(
10745     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
10746     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
10747     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
10748   assert(Options.SimpleReduction && "Only simple reduction is expected.");
10749   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
10750                                  ReductionOps, Options);
10751 }
10752 
10753 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
10754     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
10755     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
10756   llvm_unreachable("Not supported in SIMD-only mode");
10757 }
10758 
10759 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
10760                                                   SourceLocation Loc,
10761                                                   ReductionCodeGen &RCG,
10762                                                   unsigned N) {
10763   llvm_unreachable("Not supported in SIMD-only mode");
10764 }
10765 
10766 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
10767                                                   SourceLocation Loc,
10768                                                   llvm::Value *ReductionsPtr,
10769                                                   LValue SharedLVal) {
10770   llvm_unreachable("Not supported in SIMD-only mode");
10771 }
10772 
10773 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
10774                                            SourceLocation Loc) {
10775   llvm_unreachable("Not supported in SIMD-only mode");
10776 }
10777 
10778 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
10779     CodeGenFunction &CGF, SourceLocation Loc,
10780     OpenMPDirectiveKind CancelRegion) {
10781   llvm_unreachable("Not supported in SIMD-only mode");
10782 }
10783 
10784 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
10785                                          SourceLocation Loc, const Expr *IfCond,
10786                                          OpenMPDirectiveKind CancelRegion) {
10787   llvm_unreachable("Not supported in SIMD-only mode");
10788 }
10789 
10790 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
10791     const OMPExecutableDirective &D, StringRef ParentName,
10792     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
10793     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
10794   llvm_unreachable("Not supported in SIMD-only mode");
10795 }
10796 
10797 void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF,
10798                                          const OMPExecutableDirective &D,
10799                                          llvm::Function *OutlinedFn,
10800                                          llvm::Value *OutlinedFnID,
10801                                          const Expr *IfCond,
10802                                          const Expr *Device) {
10803   llvm_unreachable("Not supported in SIMD-only mode");
10804 }
10805 
10806 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
10807   llvm_unreachable("Not supported in SIMD-only mode");
10808 }
10809 
10810 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10811   llvm_unreachable("Not supported in SIMD-only mode");
10812 }
10813 
10814 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
10815   return false;
10816 }
10817 
10818 llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() {
10819   return nullptr;
10820 }
10821 
10822 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
10823                                         const OMPExecutableDirective &D,
10824                                         SourceLocation Loc,
10825                                         llvm::Function *OutlinedFn,
10826                                         ArrayRef<llvm::Value *> CapturedVars) {
10827   llvm_unreachable("Not supported in SIMD-only mode");
10828 }
10829 
10830 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10831                                              const Expr *NumTeams,
10832                                              const Expr *ThreadLimit,
10833                                              SourceLocation Loc) {
10834   llvm_unreachable("Not supported in SIMD-only mode");
10835 }
10836 
10837 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
10838     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10839     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10840   llvm_unreachable("Not supported in SIMD-only mode");
10841 }
10842 
10843 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
10844     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10845     const Expr *Device) {
10846   llvm_unreachable("Not supported in SIMD-only mode");
10847 }
10848 
10849 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
10850                                            const OMPLoopDirective &D,
10851                                            ArrayRef<Expr *> NumIterations) {
10852   llvm_unreachable("Not supported in SIMD-only mode");
10853 }
10854 
10855 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
10856                                               const OMPDependClause *C) {
10857   llvm_unreachable("Not supported in SIMD-only mode");
10858 }
10859 
10860 const VarDecl *
10861 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
10862                                         const VarDecl *NativeParam) const {
10863   llvm_unreachable("Not supported in SIMD-only mode");
10864 }
10865 
10866 Address
10867 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
10868                                          const VarDecl *NativeParam,
10869                                          const VarDecl *TargetParam) const {
10870   llvm_unreachable("Not supported in SIMD-only mode");
10871 }
10872