1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for OpenMP runtime code generation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGOpenMPRuntime.h"
17 #include "CGRecordLayout.h"
18 #include "CodeGenFunction.h"
19 #include "clang/CodeGen/ConstantInitBuilder.h"
20 #include "clang/AST/Decl.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "clang/Basic/BitmaskEnum.h"
23 #include "llvm/ADT/ArrayRef.h"
24 #include "llvm/Bitcode/BitcodeReader.h"
25 #include "llvm/IR/CallSite.h"
26 #include "llvm/IR/DerivedTypes.h"
27 #include "llvm/IR/GlobalValue.h"
28 #include "llvm/IR/Value.h"
29 #include "llvm/Support/Format.h"
30 #include "llvm/Support/raw_ostream.h"
31 #include <cassert>
32 
33 using namespace clang;
34 using namespace CodeGen;
35 
36 namespace {
37 /// Base class for handling code generation inside OpenMP regions.
38 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
39 public:
40   /// Kinds of OpenMP regions used in codegen.
41   enum CGOpenMPRegionKind {
42     /// Region with outlined function for standalone 'parallel'
43     /// directive.
44     ParallelOutlinedRegion,
45     /// Region with outlined function for standalone 'task' directive.
46     TaskOutlinedRegion,
47     /// Region for constructs that do not require function outlining,
48     /// like 'for', 'sections', 'atomic' etc. directives.
49     InlinedRegion,
50     /// Region with outlined function for standalone 'target' directive.
51     TargetRegion,
52   };
53 
54   CGOpenMPRegionInfo(const CapturedStmt &CS,
55                      const CGOpenMPRegionKind RegionKind,
56                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
57                      bool HasCancel)
58       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
59         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
60 
61   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
62                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
63                      bool HasCancel)
64       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
65         Kind(Kind), HasCancel(HasCancel) {}
66 
67   /// Get a variable or parameter for storing global thread id
68   /// inside OpenMP construct.
69   virtual const VarDecl *getThreadIDVariable() const = 0;
70 
71   /// Emit the captured statement body.
72   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
73 
74   /// Get an LValue for the current ThreadID variable.
75   /// \return LValue for thread id variable. This LValue always has type int32*.
76   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
77 
78   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
79 
80   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
81 
82   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
83 
84   bool hasCancel() const { return HasCancel; }
85 
86   static bool classof(const CGCapturedStmtInfo *Info) {
87     return Info->getKind() == CR_OpenMP;
88   }
89 
90   ~CGOpenMPRegionInfo() override = default;
91 
92 protected:
93   CGOpenMPRegionKind RegionKind;
94   RegionCodeGenTy CodeGen;
95   OpenMPDirectiveKind Kind;
96   bool HasCancel;
97 };
98 
99 /// API for captured statement code generation in OpenMP constructs.
100 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
101 public:
102   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
103                              const RegionCodeGenTy &CodeGen,
104                              OpenMPDirectiveKind Kind, bool HasCancel,
105                              StringRef HelperName)
106       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
107                            HasCancel),
108         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
109     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
110   }
111 
112   /// Get a variable or parameter for storing global thread id
113   /// inside OpenMP construct.
114   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
115 
116   /// Get the name of the capture helper.
117   StringRef getHelperName() const override { return HelperName; }
118 
119   static bool classof(const CGCapturedStmtInfo *Info) {
120     return CGOpenMPRegionInfo::classof(Info) &&
121            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
122                ParallelOutlinedRegion;
123   }
124 
125 private:
126   /// A variable or parameter storing global thread id for OpenMP
127   /// constructs.
128   const VarDecl *ThreadIDVar;
129   StringRef HelperName;
130 };
131 
132 /// API for captured statement code generation in OpenMP constructs.
133 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
134 public:
135   class UntiedTaskActionTy final : public PrePostActionTy {
136     bool Untied;
137     const VarDecl *PartIDVar;
138     const RegionCodeGenTy UntiedCodeGen;
139     llvm::SwitchInst *UntiedSwitch = nullptr;
140 
141   public:
142     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
143                        const RegionCodeGenTy &UntiedCodeGen)
144         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
145     void Enter(CodeGenFunction &CGF) override {
146       if (Untied) {
147         // Emit task switching point.
148         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
149             CGF.GetAddrOfLocalVar(PartIDVar),
150             PartIDVar->getType()->castAs<PointerType>());
151         llvm::Value *Res =
152             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
153         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
154         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
155         CGF.EmitBlock(DoneBB);
156         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
157         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
158         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
159                               CGF.Builder.GetInsertBlock());
160         emitUntiedSwitch(CGF);
161       }
162     }
163     void emitUntiedSwitch(CodeGenFunction &CGF) const {
164       if (Untied) {
165         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
166             CGF.GetAddrOfLocalVar(PartIDVar),
167             PartIDVar->getType()->castAs<PointerType>());
168         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
169                               PartIdLVal);
170         UntiedCodeGen(CGF);
171         CodeGenFunction::JumpDest CurPoint =
172             CGF.getJumpDestInCurrentScope(".untied.next.");
173         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
174         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
175         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
176                               CGF.Builder.GetInsertBlock());
177         CGF.EmitBranchThroughCleanup(CurPoint);
178         CGF.EmitBlock(CurPoint.getBlock());
179       }
180     }
181     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
182   };
183   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
184                                  const VarDecl *ThreadIDVar,
185                                  const RegionCodeGenTy &CodeGen,
186                                  OpenMPDirectiveKind Kind, bool HasCancel,
187                                  const UntiedTaskActionTy &Action)
188       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
189         ThreadIDVar(ThreadIDVar), Action(Action) {
190     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
191   }
192 
193   /// Get a variable or parameter for storing global thread id
194   /// inside OpenMP construct.
195   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
196 
197   /// Get an LValue for the current ThreadID variable.
198   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
199 
200   /// Get the name of the capture helper.
201   StringRef getHelperName() const override { return ".omp_outlined."; }
202 
203   void emitUntiedSwitch(CodeGenFunction &CGF) override {
204     Action.emitUntiedSwitch(CGF);
205   }
206 
207   static bool classof(const CGCapturedStmtInfo *Info) {
208     return CGOpenMPRegionInfo::classof(Info) &&
209            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
210                TaskOutlinedRegion;
211   }
212 
213 private:
214   /// A variable or parameter storing global thread id for OpenMP
215   /// constructs.
216   const VarDecl *ThreadIDVar;
217   /// Action for emitting code for untied tasks.
218   const UntiedTaskActionTy &Action;
219 };
220 
221 /// API for inlined captured statement code generation in OpenMP
222 /// constructs.
223 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
224 public:
225   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
226                             const RegionCodeGenTy &CodeGen,
227                             OpenMPDirectiveKind Kind, bool HasCancel)
228       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
229         OldCSI(OldCSI),
230         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
231 
232   // Retrieve the value of the context parameter.
233   llvm::Value *getContextValue() const override {
234     if (OuterRegionInfo)
235       return OuterRegionInfo->getContextValue();
236     llvm_unreachable("No context value for inlined OpenMP region");
237   }
238 
239   void setContextValue(llvm::Value *V) override {
240     if (OuterRegionInfo) {
241       OuterRegionInfo->setContextValue(V);
242       return;
243     }
244     llvm_unreachable("No context value for inlined OpenMP region");
245   }
246 
247   /// Lookup the captured field decl for a variable.
248   const FieldDecl *lookup(const VarDecl *VD) const override {
249     if (OuterRegionInfo)
250       return OuterRegionInfo->lookup(VD);
251     // If there is no outer outlined region,no need to lookup in a list of
252     // captured variables, we can use the original one.
253     return nullptr;
254   }
255 
256   FieldDecl *getThisFieldDecl() const override {
257     if (OuterRegionInfo)
258       return OuterRegionInfo->getThisFieldDecl();
259     return nullptr;
260   }
261 
262   /// Get a variable or parameter for storing global thread id
263   /// inside OpenMP construct.
264   const VarDecl *getThreadIDVariable() const override {
265     if (OuterRegionInfo)
266       return OuterRegionInfo->getThreadIDVariable();
267     return nullptr;
268   }
269 
270   /// Get an LValue for the current ThreadID variable.
271   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
272     if (OuterRegionInfo)
273       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
274     llvm_unreachable("No LValue for inlined OpenMP construct");
275   }
276 
277   /// Get the name of the capture helper.
278   StringRef getHelperName() const override {
279     if (auto *OuterRegionInfo = getOldCSI())
280       return OuterRegionInfo->getHelperName();
281     llvm_unreachable("No helper name for inlined OpenMP construct");
282   }
283 
284   void emitUntiedSwitch(CodeGenFunction &CGF) override {
285     if (OuterRegionInfo)
286       OuterRegionInfo->emitUntiedSwitch(CGF);
287   }
288 
289   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
290 
291   static bool classof(const CGCapturedStmtInfo *Info) {
292     return CGOpenMPRegionInfo::classof(Info) &&
293            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
294   }
295 
296   ~CGOpenMPInlinedRegionInfo() override = default;
297 
298 private:
299   /// CodeGen info about outer OpenMP region.
300   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
301   CGOpenMPRegionInfo *OuterRegionInfo;
302 };
303 
304 /// API for captured statement code generation in OpenMP target
305 /// constructs. For this captures, implicit parameters are used instead of the
306 /// captured fields. The name of the target region has to be unique in a given
307 /// application so it is provided by the client, because only the client has
308 /// the information to generate that.
309 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
310 public:
311   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
312                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
313       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
314                            /*HasCancel=*/false),
315         HelperName(HelperName) {}
316 
317   /// This is unused for target regions because each starts executing
318   /// with a single thread.
319   const VarDecl *getThreadIDVariable() const override { return nullptr; }
320 
321   /// Get the name of the capture helper.
322   StringRef getHelperName() const override { return HelperName; }
323 
324   static bool classof(const CGCapturedStmtInfo *Info) {
325     return CGOpenMPRegionInfo::classof(Info) &&
326            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
327   }
328 
329 private:
330   StringRef HelperName;
331 };
332 
333 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
334   llvm_unreachable("No codegen for expressions");
335 }
336 /// API for generation of expressions captured in a innermost OpenMP
337 /// region.
338 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
339 public:
340   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
341       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
342                                   OMPD_unknown,
343                                   /*HasCancel=*/false),
344         PrivScope(CGF) {
345     // Make sure the globals captured in the provided statement are local by
346     // using the privatization logic. We assume the same variable is not
347     // captured more than once.
348     for (const auto &C : CS.captures()) {
349       if (!C.capturesVariable() && !C.capturesVariableByCopy())
350         continue;
351 
352       const VarDecl *VD = C.getCapturedVar();
353       if (VD->isLocalVarDeclOrParm())
354         continue;
355 
356       DeclRefExpr DRE(const_cast<VarDecl *>(VD),
357                       /*RefersToEnclosingVariableOrCapture=*/false,
358                       VD->getType().getNonReferenceType(), VK_LValue,
359                       C.getLocation());
360       PrivScope.addPrivate(
361           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); });
362     }
363     (void)PrivScope.Privatize();
364   }
365 
366   /// Lookup the captured field decl for a variable.
367   const FieldDecl *lookup(const VarDecl *VD) const override {
368     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
369       return FD;
370     return nullptr;
371   }
372 
373   /// Emit the captured statement body.
374   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
375     llvm_unreachable("No body for expressions");
376   }
377 
378   /// Get a variable or parameter for storing global thread id
379   /// inside OpenMP construct.
380   const VarDecl *getThreadIDVariable() const override {
381     llvm_unreachable("No thread id for expressions");
382   }
383 
384   /// Get the name of the capture helper.
385   StringRef getHelperName() const override {
386     llvm_unreachable("No helper name for expressions");
387   }
388 
389   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
390 
391 private:
392   /// Private scope to capture global variables.
393   CodeGenFunction::OMPPrivateScope PrivScope;
394 };
395 
396 /// RAII for emitting code of OpenMP constructs.
397 class InlinedOpenMPRegionRAII {
398   CodeGenFunction &CGF;
399   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
400   FieldDecl *LambdaThisCaptureField = nullptr;
401   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
402 
403 public:
404   /// Constructs region for combined constructs.
405   /// \param CodeGen Code generation sequence for combined directives. Includes
406   /// a list of functions used for code generation of implicitly inlined
407   /// regions.
408   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
409                           OpenMPDirectiveKind Kind, bool HasCancel)
410       : CGF(CGF) {
411     // Start emission for the construct.
412     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
413         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
414     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
415     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
416     CGF.LambdaThisCaptureField = nullptr;
417     BlockInfo = CGF.BlockInfo;
418     CGF.BlockInfo = nullptr;
419   }
420 
421   ~InlinedOpenMPRegionRAII() {
422     // Restore original CapturedStmtInfo only if we're done with code emission.
423     auto *OldCSI =
424         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
425     delete CGF.CapturedStmtInfo;
426     CGF.CapturedStmtInfo = OldCSI;
427     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
428     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
429     CGF.BlockInfo = BlockInfo;
430   }
431 };
432 
433 /// Values for bit flags used in the ident_t to describe the fields.
434 /// All enumeric elements are named and described in accordance with the code
435 /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
436 enum OpenMPLocationFlags : unsigned {
437   /// Use trampoline for internal microtask.
438   OMP_IDENT_IMD = 0x01,
439   /// Use c-style ident structure.
440   OMP_IDENT_KMPC = 0x02,
441   /// Atomic reduction option for kmpc_reduce.
442   OMP_ATOMIC_REDUCE = 0x10,
443   /// Explicit 'barrier' directive.
444   OMP_IDENT_BARRIER_EXPL = 0x20,
445   /// Implicit barrier in code.
446   OMP_IDENT_BARRIER_IMPL = 0x40,
447   /// Implicit barrier in 'for' directive.
448   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
449   /// Implicit barrier in 'sections' directive.
450   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
451   /// Implicit barrier in 'single' directive.
452   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
453   /// Call of __kmp_for_static_init for static loop.
454   OMP_IDENT_WORK_LOOP = 0x200,
455   /// Call of __kmp_for_static_init for sections.
456   OMP_IDENT_WORK_SECTIONS = 0x400,
457   /// Call of __kmp_for_static_init for distribute.
458   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
459   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
460 };
461 
462 /// Describes ident structure that describes a source location.
463 /// All descriptions are taken from
464 /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
465 /// Original structure:
466 /// typedef struct ident {
467 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
468 ///                                  see above  */
469 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
470 ///                                  KMP_IDENT_KMPC identifies this union
471 ///                                  member  */
472 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
473 ///                                  see above */
474 ///#if USE_ITT_BUILD
475 ///                            /*  but currently used for storing
476 ///                                region-specific ITT */
477 ///                            /*  contextual information. */
478 ///#endif /* USE_ITT_BUILD */
479 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
480 ///                                 C++  */
481 ///    char const *psource;    /**< String describing the source location.
482 ///                            The string is composed of semi-colon separated
483 //                             fields which describe the source file,
484 ///                            the function and a pair of line numbers that
485 ///                            delimit the construct.
486 ///                             */
487 /// } ident_t;
488 enum IdentFieldIndex {
489   /// might be used in Fortran
490   IdentField_Reserved_1,
491   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
492   IdentField_Flags,
493   /// Not really used in Fortran any more
494   IdentField_Reserved_2,
495   /// Source[4] in Fortran, do not use for C++
496   IdentField_Reserved_3,
497   /// String describing the source location. The string is composed of
498   /// semi-colon separated fields which describe the source file, the function
499   /// and a pair of line numbers that delimit the construct.
500   IdentField_PSource
501 };
502 
503 /// Schedule types for 'omp for' loops (these enumerators are taken from
504 /// the enum sched_type in kmp.h).
505 enum OpenMPSchedType {
506   /// Lower bound for default (unordered) versions.
507   OMP_sch_lower = 32,
508   OMP_sch_static_chunked = 33,
509   OMP_sch_static = 34,
510   OMP_sch_dynamic_chunked = 35,
511   OMP_sch_guided_chunked = 36,
512   OMP_sch_runtime = 37,
513   OMP_sch_auto = 38,
514   /// static with chunk adjustment (e.g., simd)
515   OMP_sch_static_balanced_chunked = 45,
516   /// Lower bound for 'ordered' versions.
517   OMP_ord_lower = 64,
518   OMP_ord_static_chunked = 65,
519   OMP_ord_static = 66,
520   OMP_ord_dynamic_chunked = 67,
521   OMP_ord_guided_chunked = 68,
522   OMP_ord_runtime = 69,
523   OMP_ord_auto = 70,
524   OMP_sch_default = OMP_sch_static,
525   /// dist_schedule types
526   OMP_dist_sch_static_chunked = 91,
527   OMP_dist_sch_static = 92,
528   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
529   /// Set if the monotonic schedule modifier was present.
530   OMP_sch_modifier_monotonic = (1 << 29),
531   /// Set if the nonmonotonic schedule modifier was present.
532   OMP_sch_modifier_nonmonotonic = (1 << 30),
533 };
534 
535 enum OpenMPRTLFunction {
536   /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
537   /// kmpc_micro microtask, ...);
538   OMPRTL__kmpc_fork_call,
539   /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
540   /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
541   OMPRTL__kmpc_threadprivate_cached,
542   /// Call to void __kmpc_threadprivate_register( ident_t *,
543   /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
544   OMPRTL__kmpc_threadprivate_register,
545   // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
546   OMPRTL__kmpc_global_thread_num,
547   // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
548   // kmp_critical_name *crit);
549   OMPRTL__kmpc_critical,
550   // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
551   // global_tid, kmp_critical_name *crit, uintptr_t hint);
552   OMPRTL__kmpc_critical_with_hint,
553   // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
554   // kmp_critical_name *crit);
555   OMPRTL__kmpc_end_critical,
556   // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
557   // global_tid);
558   OMPRTL__kmpc_cancel_barrier,
559   // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
560   OMPRTL__kmpc_barrier,
561   // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
562   OMPRTL__kmpc_for_static_fini,
563   // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
564   // global_tid);
565   OMPRTL__kmpc_serialized_parallel,
566   // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
567   // global_tid);
568   OMPRTL__kmpc_end_serialized_parallel,
569   // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
570   // kmp_int32 num_threads);
571   OMPRTL__kmpc_push_num_threads,
572   // Call to void __kmpc_flush(ident_t *loc);
573   OMPRTL__kmpc_flush,
574   // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
575   OMPRTL__kmpc_master,
576   // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
577   OMPRTL__kmpc_end_master,
578   // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
579   // int end_part);
580   OMPRTL__kmpc_omp_taskyield,
581   // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
582   OMPRTL__kmpc_single,
583   // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
584   OMPRTL__kmpc_end_single,
585   // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
586   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
587   // kmp_routine_entry_t *task_entry);
588   OMPRTL__kmpc_omp_task_alloc,
589   // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
590   // new_task);
591   OMPRTL__kmpc_omp_task,
592   // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
593   // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
594   // kmp_int32 didit);
595   OMPRTL__kmpc_copyprivate,
596   // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
597   // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
598   // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
599   OMPRTL__kmpc_reduce,
600   // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
601   // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
602   // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
603   // *lck);
604   OMPRTL__kmpc_reduce_nowait,
605   // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
606   // kmp_critical_name *lck);
607   OMPRTL__kmpc_end_reduce,
608   // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
609   // kmp_critical_name *lck);
610   OMPRTL__kmpc_end_reduce_nowait,
611   // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
612   // kmp_task_t * new_task);
613   OMPRTL__kmpc_omp_task_begin_if0,
614   // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
615   // kmp_task_t * new_task);
616   OMPRTL__kmpc_omp_task_complete_if0,
617   // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
618   OMPRTL__kmpc_ordered,
619   // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
620   OMPRTL__kmpc_end_ordered,
621   // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
622   // global_tid);
623   OMPRTL__kmpc_omp_taskwait,
624   // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
625   OMPRTL__kmpc_taskgroup,
626   // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
627   OMPRTL__kmpc_end_taskgroup,
628   // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
629   // int proc_bind);
630   OMPRTL__kmpc_push_proc_bind,
631   // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
632   // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
633   // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
634   OMPRTL__kmpc_omp_task_with_deps,
635   // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
636   // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
637   // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
638   OMPRTL__kmpc_omp_wait_deps,
639   // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
640   // global_tid, kmp_int32 cncl_kind);
641   OMPRTL__kmpc_cancellationpoint,
642   // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
643   // kmp_int32 cncl_kind);
644   OMPRTL__kmpc_cancel,
645   // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
646   // kmp_int32 num_teams, kmp_int32 thread_limit);
647   OMPRTL__kmpc_push_num_teams,
648   // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
649   // microtask, ...);
650   OMPRTL__kmpc_fork_teams,
651   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
652   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
653   // sched, kmp_uint64 grainsize, void *task_dup);
654   OMPRTL__kmpc_taskloop,
655   // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
656   // num_dims, struct kmp_dim *dims);
657   OMPRTL__kmpc_doacross_init,
658   // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
659   OMPRTL__kmpc_doacross_fini,
660   // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
661   // *vec);
662   OMPRTL__kmpc_doacross_post,
663   // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
664   // *vec);
665   OMPRTL__kmpc_doacross_wait,
666   // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
667   // *data);
668   OMPRTL__kmpc_task_reduction_init,
669   // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
670   // *d);
671   OMPRTL__kmpc_task_reduction_get_th_data,
672 
673   //
674   // Offloading related calls
675   //
676   // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
677   // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
678   // *arg_types);
679   OMPRTL__tgt_target,
680   // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
681   // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
682   // *arg_types);
683   OMPRTL__tgt_target_nowait,
684   // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
685   // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
686   // *arg_types, int32_t num_teams, int32_t thread_limit);
687   OMPRTL__tgt_target_teams,
688   // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
689   // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t
690   // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
691   OMPRTL__tgt_target_teams_nowait,
692   // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
693   OMPRTL__tgt_register_lib,
694   // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
695   OMPRTL__tgt_unregister_lib,
696   // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
697   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
698   OMPRTL__tgt_target_data_begin,
699   // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
700   // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
701   // *arg_types);
702   OMPRTL__tgt_target_data_begin_nowait,
703   // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
704   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
705   OMPRTL__tgt_target_data_end,
706   // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
707   // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
708   // *arg_types);
709   OMPRTL__tgt_target_data_end_nowait,
710   // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
711   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
712   OMPRTL__tgt_target_data_update,
713   // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
714   // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
715   // *arg_types);
716   OMPRTL__tgt_target_data_update_nowait,
717 };
718 
719 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
720 /// region.
721 class CleanupTy final : public EHScopeStack::Cleanup {
722   PrePostActionTy *Action;
723 
724 public:
725   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
726   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
727     if (!CGF.HaveInsertPoint())
728       return;
729     Action->Exit(CGF);
730   }
731 };
732 
733 } // anonymous namespace
734 
735 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
736   CodeGenFunction::RunCleanupsScope Scope(CGF);
737   if (PrePostAction) {
738     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
739     Callback(CodeGen, CGF, *PrePostAction);
740   } else {
741     PrePostActionTy Action;
742     Callback(CodeGen, CGF, Action);
743   }
744 }
745 
746 /// Check if the combiner is a call to UDR combiner and if it is so return the
747 /// UDR decl used for reduction.
748 static const OMPDeclareReductionDecl *
749 getReductionInit(const Expr *ReductionOp) {
750   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
751     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
752       if (const auto *DRE =
753               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
754         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
755           return DRD;
756   return nullptr;
757 }
758 
759 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
760                                              const OMPDeclareReductionDecl *DRD,
761                                              const Expr *InitOp,
762                                              Address Private, Address Original,
763                                              QualType Ty) {
764   if (DRD->getInitializer()) {
765     std::pair<llvm::Function *, llvm::Function *> Reduction =
766         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
767     const auto *CE = cast<CallExpr>(InitOp);
768     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
769     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
770     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
771     const auto *LHSDRE =
772         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
773     const auto *RHSDRE =
774         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
775     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
776     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
777                             [=]() { return Private; });
778     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
779                             [=]() { return Original; });
780     (void)PrivateScope.Privatize();
781     RValue Func = RValue::get(Reduction.second);
782     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
783     CGF.EmitIgnoredExpr(InitOp);
784   } else {
785     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
786     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
787     auto *GV = new llvm::GlobalVariable(
788         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
789         llvm::GlobalValue::PrivateLinkage, Init, Name);
790     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
791     RValue InitRVal;
792     switch (CGF.getEvaluationKind(Ty)) {
793     case TEK_Scalar:
794       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
795       break;
796     case TEK_Complex:
797       InitRVal =
798           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
799       break;
800     case TEK_Aggregate:
801       InitRVal = RValue::getAggregate(LV.getAddress());
802       break;
803     }
804     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
805     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
806     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
807                          /*IsInitializer=*/false);
808   }
809 }
810 
811 /// Emit initialization of arrays of complex types.
812 /// \param DestAddr Address of the array.
813 /// \param Type Type of array.
814 /// \param Init Initial expression of array.
815 /// \param SrcAddr Address of the original array.
816 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
817                                  QualType Type, bool EmitDeclareReductionInit,
818                                  const Expr *Init,
819                                  const OMPDeclareReductionDecl *DRD,
820                                  Address SrcAddr = Address::invalid()) {
821   // Perform element-by-element initialization.
822   QualType ElementTy;
823 
824   // Drill down to the base element type on both arrays.
825   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
826   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
827   DestAddr =
828       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
829   if (DRD)
830     SrcAddr =
831         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
832 
833   llvm::Value *SrcBegin = nullptr;
834   if (DRD)
835     SrcBegin = SrcAddr.getPointer();
836   llvm::Value *DestBegin = DestAddr.getPointer();
837   // Cast from pointer to array type to pointer to single element.
838   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
839   // The basic structure here is a while-do loop.
840   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
841   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
842   llvm::Value *IsEmpty =
843       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
844   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
845 
846   // Enter the loop body, making that address the current address.
847   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
848   CGF.EmitBlock(BodyBB);
849 
850   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
851 
852   llvm::PHINode *SrcElementPHI = nullptr;
853   Address SrcElementCurrent = Address::invalid();
854   if (DRD) {
855     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
856                                           "omp.arraycpy.srcElementPast");
857     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
858     SrcElementCurrent =
859         Address(SrcElementPHI,
860                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
861   }
862   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
863       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
864   DestElementPHI->addIncoming(DestBegin, EntryBB);
865   Address DestElementCurrent =
866       Address(DestElementPHI,
867               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
868 
869   // Emit copy.
870   {
871     CodeGenFunction::RunCleanupsScope InitScope(CGF);
872     if (EmitDeclareReductionInit) {
873       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
874                                        SrcElementCurrent, ElementTy);
875     } else
876       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
877                            /*IsInitializer=*/false);
878   }
879 
880   if (DRD) {
881     // Shift the address forward by one element.
882     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
883         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
884     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
885   }
886 
887   // Shift the address forward by one element.
888   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
889       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
890   // Check whether we've reached the end.
891   llvm::Value *Done =
892       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
893   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
894   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
895 
896   // Done.
897   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
898 }
899 
900 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
901   return CGF.EmitOMPSharedLValue(E);
902 }
903 
904 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
905                                             const Expr *E) {
906   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
907     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
908   return LValue();
909 }
910 
911 void ReductionCodeGen::emitAggregateInitialization(
912     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
913     const OMPDeclareReductionDecl *DRD) {
914   // Emit VarDecl with copy init for arrays.
915   // Get the address of the original variable captured in current
916   // captured region.
917   const auto *PrivateVD =
918       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
919   bool EmitDeclareReductionInit =
920       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
921   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
922                        EmitDeclareReductionInit,
923                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
924                                                 : PrivateVD->getInit(),
925                        DRD, SharedLVal.getAddress());
926 }
927 
928 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
929                                    ArrayRef<const Expr *> Privates,
930                                    ArrayRef<const Expr *> ReductionOps) {
931   ClausesData.reserve(Shareds.size());
932   SharedAddresses.reserve(Shareds.size());
933   Sizes.reserve(Shareds.size());
934   BaseDecls.reserve(Shareds.size());
935   auto IPriv = Privates.begin();
936   auto IRed = ReductionOps.begin();
937   for (const Expr *Ref : Shareds) {
938     ClausesData.emplace_back(Ref, *IPriv, *IRed);
939     std::advance(IPriv, 1);
940     std::advance(IRed, 1);
941   }
942 }
943 
944 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
945   assert(SharedAddresses.size() == N &&
946          "Number of generated lvalues must be exactly N.");
947   LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
948   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
949   SharedAddresses.emplace_back(First, Second);
950 }
951 
952 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
953   const auto *PrivateVD =
954       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
955   QualType PrivateType = PrivateVD->getType();
956   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
957   if (!PrivateType->isVariablyModifiedType()) {
958     Sizes.emplace_back(
959         CGF.getTypeSize(
960             SharedAddresses[N].first.getType().getNonReferenceType()),
961         nullptr);
962     return;
963   }
964   llvm::Value *Size;
965   llvm::Value *SizeInChars;
966   auto *ElemType =
967       cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType())
968           ->getElementType();
969   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
970   if (AsArraySection) {
971     Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
972                                      SharedAddresses[N].first.getPointer());
973     Size = CGF.Builder.CreateNUWAdd(
974         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
975     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
976   } else {
977     SizeInChars = CGF.getTypeSize(
978         SharedAddresses[N].first.getType().getNonReferenceType());
979     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
980   }
981   Sizes.emplace_back(SizeInChars, Size);
982   CodeGenFunction::OpaqueValueMapping OpaqueMap(
983       CGF,
984       cast<OpaqueValueExpr>(
985           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
986       RValue::get(Size));
987   CGF.EmitVariablyModifiedType(PrivateType);
988 }
989 
990 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
991                                          llvm::Value *Size) {
992   const auto *PrivateVD =
993       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
994   QualType PrivateType = PrivateVD->getType();
995   if (!PrivateType->isVariablyModifiedType()) {
996     assert(!Size && !Sizes[N].second &&
997            "Size should be nullptr for non-variably modified reduction "
998            "items.");
999     return;
1000   }
1001   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1002       CGF,
1003       cast<OpaqueValueExpr>(
1004           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1005       RValue::get(Size));
1006   CGF.EmitVariablyModifiedType(PrivateType);
1007 }
1008 
1009 void ReductionCodeGen::emitInitialization(
1010     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1011     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1012   assert(SharedAddresses.size() > N && "No variable was generated");
1013   const auto *PrivateVD =
1014       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1015   const OMPDeclareReductionDecl *DRD =
1016       getReductionInit(ClausesData[N].ReductionOp);
1017   QualType PrivateType = PrivateVD->getType();
1018   PrivateAddr = CGF.Builder.CreateElementBitCast(
1019       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1020   QualType SharedType = SharedAddresses[N].first.getType();
1021   SharedLVal = CGF.MakeAddrLValue(
1022       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
1023                                        CGF.ConvertTypeForMem(SharedType)),
1024       SharedType, SharedAddresses[N].first.getBaseInfo(),
1025       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1026   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1027     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1028   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1029     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1030                                      PrivateAddr, SharedLVal.getAddress(),
1031                                      SharedLVal.getType());
1032   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1033              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1034     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1035                          PrivateVD->getType().getQualifiers(),
1036                          /*IsInitializer=*/false);
1037   }
1038 }
1039 
1040 bool ReductionCodeGen::needCleanups(unsigned N) {
1041   const auto *PrivateVD =
1042       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1043   QualType PrivateType = PrivateVD->getType();
1044   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1045   return DTorKind != QualType::DK_none;
1046 }
1047 
1048 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
1049                                     Address PrivateAddr) {
1050   const auto *PrivateVD =
1051       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1052   QualType PrivateType = PrivateVD->getType();
1053   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1054   if (needCleanups(N)) {
1055     PrivateAddr = CGF.Builder.CreateElementBitCast(
1056         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1057     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1058   }
1059 }
1060 
1061 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1062                           LValue BaseLV) {
1063   BaseTy = BaseTy.getNonReferenceType();
1064   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1065          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1066     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
1067       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
1068     } else {
1069       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
1070       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1071     }
1072     BaseTy = BaseTy->getPointeeType();
1073   }
1074   return CGF.MakeAddrLValue(
1075       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(),
1076                                        CGF.ConvertTypeForMem(ElTy)),
1077       BaseLV.getType(), BaseLV.getBaseInfo(),
1078       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1079 }
1080 
1081 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1082                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1083                           llvm::Value *Addr) {
1084   Address Tmp = Address::invalid();
1085   Address TopTmp = Address::invalid();
1086   Address MostTopTmp = Address::invalid();
1087   BaseTy = BaseTy.getNonReferenceType();
1088   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1089          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1090     Tmp = CGF.CreateMemTemp(BaseTy);
1091     if (TopTmp.isValid())
1092       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1093     else
1094       MostTopTmp = Tmp;
1095     TopTmp = Tmp;
1096     BaseTy = BaseTy->getPointeeType();
1097   }
1098   llvm::Type *Ty = BaseLVType;
1099   if (Tmp.isValid())
1100     Ty = Tmp.getElementType();
1101   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1102   if (Tmp.isValid()) {
1103     CGF.Builder.CreateStore(Addr, Tmp);
1104     return MostTopTmp;
1105   }
1106   return Address(Addr, BaseLVAlignment);
1107 }
1108 
1109 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1110   const VarDecl *OrigVD = nullptr;
1111   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1112     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
1113     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1114       Base = TempOASE->getBase()->IgnoreParenImpCasts();
1115     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1116       Base = TempASE->getBase()->IgnoreParenImpCasts();
1117     DE = cast<DeclRefExpr>(Base);
1118     OrigVD = cast<VarDecl>(DE->getDecl());
1119   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1120     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1121     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1122       Base = TempASE->getBase()->IgnoreParenImpCasts();
1123     DE = cast<DeclRefExpr>(Base);
1124     OrigVD = cast<VarDecl>(DE->getDecl());
1125   }
1126   return OrigVD;
1127 }
1128 
1129 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1130                                                Address PrivateAddr) {
1131   const DeclRefExpr *DE;
1132   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1133     BaseDecls.emplace_back(OrigVD);
1134     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1135     LValue BaseLValue =
1136         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1137                     OriginalBaseLValue);
1138     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1139         BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
1140     llvm::Value *PrivatePointer =
1141         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1142             PrivateAddr.getPointer(),
1143             SharedAddresses[N].first.getAddress().getType());
1144     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1145     return castToBase(CGF, OrigVD->getType(),
1146                       SharedAddresses[N].first.getType(),
1147                       OriginalBaseLValue.getAddress().getType(),
1148                       OriginalBaseLValue.getAlignment(), Ptr);
1149   }
1150   BaseDecls.emplace_back(
1151       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1152   return PrivateAddr;
1153 }
1154 
1155 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1156   const OMPDeclareReductionDecl *DRD =
1157       getReductionInit(ClausesData[N].ReductionOp);
1158   return DRD && DRD->getInitializer();
1159 }
1160 
1161 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1162   return CGF.EmitLoadOfPointerLValue(
1163       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1164       getThreadIDVariable()->getType()->castAs<PointerType>());
1165 }
1166 
1167 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1168   if (!CGF.HaveInsertPoint())
1169     return;
1170   // 1.2.2 OpenMP Language Terminology
1171   // Structured block - An executable statement with a single entry at the
1172   // top and a single exit at the bottom.
1173   // The point of exit cannot be a branch out of the structured block.
1174   // longjmp() and throw() must not violate the entry/exit criteria.
1175   CGF.EHStack.pushTerminate();
1176   CodeGen(CGF);
1177   CGF.EHStack.popTerminate();
1178 }
1179 
1180 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1181     CodeGenFunction &CGF) {
1182   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1183                             getThreadIDVariable()->getType(),
1184                             AlignmentSource::Decl);
1185 }
1186 
1187 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1188                                        QualType FieldTy) {
1189   auto *Field = FieldDecl::Create(
1190       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1191       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1192       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1193   Field->setAccess(AS_public);
1194   DC->addDecl(Field);
1195   return Field;
1196 }
1197 
1198 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1199                                  StringRef Separator)
1200     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1201       OffloadEntriesInfoManager(CGM) {
1202   ASTContext &C = CGM.getContext();
1203   RecordDecl *RD = C.buildImplicitRecord("ident_t");
1204   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1205   RD->startDefinition();
1206   // reserved_1
1207   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1208   // flags
1209   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1210   // reserved_2
1211   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1212   // reserved_3
1213   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1214   // psource
1215   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1216   RD->completeDefinition();
1217   IdentQTy = C.getRecordType(RD);
1218   IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1219   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1220 
1221   loadOffloadInfoMetadata();
1222 }
1223 
1224 void CGOpenMPRuntime::clear() {
1225   InternalVars.clear();
1226   // Clean non-target variable declarations possibly used only in debug info.
1227   for (const auto &Data : EmittedNonTargetVariables) {
1228     if (!Data.getValue().pointsToAliveValue())
1229       continue;
1230     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1231     if (!GV)
1232       continue;
1233     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1234       continue;
1235     GV->eraseFromParent();
1236   }
1237 }
1238 
1239 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1240   SmallString<128> Buffer;
1241   llvm::raw_svector_ostream OS(Buffer);
1242   StringRef Sep = FirstSeparator;
1243   for (StringRef Part : Parts) {
1244     OS << Sep << Part;
1245     Sep = Separator;
1246   }
1247   return OS.str();
1248 }
1249 
1250 static llvm::Function *
1251 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1252                           const Expr *CombinerInitializer, const VarDecl *In,
1253                           const VarDecl *Out, bool IsCombiner) {
1254   // void .omp_combiner.(Ty *in, Ty *out);
1255   ASTContext &C = CGM.getContext();
1256   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1257   FunctionArgList Args;
1258   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1259                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1260   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1261                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1262   Args.push_back(&OmpOutParm);
1263   Args.push_back(&OmpInParm);
1264   const CGFunctionInfo &FnInfo =
1265       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1266   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1267   std::string Name = CGM.getOpenMPRuntime().getName(
1268       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1269   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1270                                     Name, &CGM.getModule());
1271   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1272   Fn->removeFnAttr(llvm::Attribute::NoInline);
1273   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1274   Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1275   CodeGenFunction CGF(CGM);
1276   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1277   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1278   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1279                     Out->getLocation());
1280   CodeGenFunction::OMPPrivateScope Scope(CGF);
1281   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1282   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1283     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1284         .getAddress();
1285   });
1286   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1287   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1288     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1289         .getAddress();
1290   });
1291   (void)Scope.Privatize();
1292   if (!IsCombiner && Out->hasInit() &&
1293       !CGF.isTrivialInitializer(Out->getInit())) {
1294     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1295                          Out->getType().getQualifiers(),
1296                          /*IsInitializer=*/true);
1297   }
1298   if (CombinerInitializer)
1299     CGF.EmitIgnoredExpr(CombinerInitializer);
1300   Scope.ForceCleanup();
1301   CGF.FinishFunction();
1302   return Fn;
1303 }
1304 
1305 void CGOpenMPRuntime::emitUserDefinedReduction(
1306     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1307   if (UDRMap.count(D) > 0)
1308     return;
1309   llvm::Function *Combiner = emitCombinerOrInitializer(
1310       CGM, D->getType(), D->getCombiner(),
1311       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1312       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1313       /*IsCombiner=*/true);
1314   llvm::Function *Initializer = nullptr;
1315   if (const Expr *Init = D->getInitializer()) {
1316     Initializer = emitCombinerOrInitializer(
1317         CGM, D->getType(),
1318         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1319                                                                      : nullptr,
1320         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1321         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1322         /*IsCombiner=*/false);
1323   }
1324   UDRMap.try_emplace(D, Combiner, Initializer);
1325   if (CGF) {
1326     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1327     Decls.second.push_back(D);
1328   }
1329 }
1330 
1331 std::pair<llvm::Function *, llvm::Function *>
1332 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1333   auto I = UDRMap.find(D);
1334   if (I != UDRMap.end())
1335     return I->second;
1336   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1337   return UDRMap.lookup(D);
1338 }
1339 
1340 static llvm::Value *emitParallelOrTeamsOutlinedFunction(
1341     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1342     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1343     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1344   assert(ThreadIDVar->getType()->isPointerType() &&
1345          "thread id variable must be of type kmp_int32 *");
1346   CodeGenFunction CGF(CGM, true);
1347   bool HasCancel = false;
1348   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1349     HasCancel = OPD->hasCancel();
1350   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1351     HasCancel = OPSD->hasCancel();
1352   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1353     HasCancel = OPFD->hasCancel();
1354   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1355     HasCancel = OPFD->hasCancel();
1356   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1357     HasCancel = OPFD->hasCancel();
1358   else if (const auto *OPFD =
1359                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1360     HasCancel = OPFD->hasCancel();
1361   else if (const auto *OPFD =
1362                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1363     HasCancel = OPFD->hasCancel();
1364   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1365                                     HasCancel, OutlinedHelperName);
1366   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1367   return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
1368 }
1369 
1370 llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction(
1371     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1372     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1373   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1374   return emitParallelOrTeamsOutlinedFunction(
1375       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1376 }
1377 
1378 llvm::Value *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1379     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1380     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1381   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1382   return emitParallelOrTeamsOutlinedFunction(
1383       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1384 }
1385 
1386 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
1387     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1388     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1389     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1390     bool Tied, unsigned &NumberOfParts) {
1391   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1392                                               PrePostActionTy &) {
1393     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1394     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1395     llvm::Value *TaskArgs[] = {
1396         UpLoc, ThreadID,
1397         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1398                                     TaskTVar->getType()->castAs<PointerType>())
1399             .getPointer()};
1400     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1401   };
1402   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1403                                                             UntiedCodeGen);
1404   CodeGen.setAction(Action);
1405   assert(!ThreadIDVar->getType()->isPointerType() &&
1406          "thread id variable must be of type kmp_int32 for tasks");
1407   const OpenMPDirectiveKind Region =
1408       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1409                                                       : OMPD_task;
1410   const CapturedStmt *CS = D.getCapturedStmt(Region);
1411   const auto *TD = dyn_cast<OMPTaskDirective>(&D);
1412   CodeGenFunction CGF(CGM, true);
1413   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1414                                         InnermostKind,
1415                                         TD ? TD->hasCancel() : false, Action);
1416   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1417   llvm::Value *Res = CGF.GenerateCapturedStmtFunction(*CS);
1418   if (!Tied)
1419     NumberOfParts = Action.getNumberOfParts();
1420   return Res;
1421 }
1422 
1423 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1424                              const RecordDecl *RD, const CGRecordLayout &RL,
1425                              ArrayRef<llvm::Constant *> Data) {
1426   llvm::StructType *StructTy = RL.getLLVMType();
1427   unsigned PrevIdx = 0;
1428   ConstantInitBuilder CIBuilder(CGM);
1429   auto DI = Data.begin();
1430   for (const FieldDecl *FD : RD->fields()) {
1431     unsigned Idx = RL.getLLVMFieldNo(FD);
1432     // Fill the alignment.
1433     for (unsigned I = PrevIdx; I < Idx; ++I)
1434       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1435     PrevIdx = Idx + 1;
1436     Fields.add(*DI);
1437     ++DI;
1438   }
1439 }
1440 
1441 template <class... As>
1442 static llvm::GlobalVariable *
1443 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1444                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1445                    As &&... Args) {
1446   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1447   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1448   ConstantInitBuilder CIBuilder(CGM);
1449   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1450   buildStructValue(Fields, CGM, RD, RL, Data);
1451   return Fields.finishAndCreateGlobal(
1452       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1453       std::forward<As>(Args)...);
1454 }
1455 
1456 template <typename T>
1457 static void
1458 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1459                                          ArrayRef<llvm::Constant *> Data,
1460                                          T &Parent) {
1461   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1462   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1463   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1464   buildStructValue(Fields, CGM, RD, RL, Data);
1465   Fields.finishAndAddTo(Parent);
1466 }
1467 
1468 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1469   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1470   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
1471   if (!Entry) {
1472     if (!DefaultOpenMPPSource) {
1473       // Initialize default location for psource field of ident_t structure of
1474       // all ident_t objects. Format is ";file;function;line;column;;".
1475       // Taken from
1476       // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
1477       DefaultOpenMPPSource =
1478           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1479       DefaultOpenMPPSource =
1480           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1481     }
1482 
1483     llvm::Constant *Data[] = {llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1484                               llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1485                               llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1486                               llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1487                               DefaultOpenMPPSource};
1488     llvm::GlobalValue *DefaultOpenMPLocation =
1489         createGlobalStruct(CGM, IdentQTy, /*IsConstant=*/false, Data, "",
1490                            llvm::GlobalValue::PrivateLinkage);
1491     DefaultOpenMPLocation->setUnnamedAddr(
1492         llvm::GlobalValue::UnnamedAddr::Global);
1493 
1494     OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
1495   }
1496   return Address(Entry, Align);
1497 }
1498 
1499 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1500                                              bool AtCurrentPoint) {
1501   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1502   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1503 
1504   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1505   if (AtCurrentPoint) {
1506     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1507         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1508   } else {
1509     Elem.second.ServiceInsertPt =
1510         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1511     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1512   }
1513 }
1514 
1515 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1516   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1517   if (Elem.second.ServiceInsertPt) {
1518     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1519     Elem.second.ServiceInsertPt = nullptr;
1520     Ptr->eraseFromParent();
1521   }
1522 }
1523 
1524 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1525                                                  SourceLocation Loc,
1526                                                  unsigned Flags) {
1527   Flags |= OMP_IDENT_KMPC;
1528   // If no debug info is generated - return global default location.
1529   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1530       Loc.isInvalid())
1531     return getOrCreateDefaultLocation(Flags).getPointer();
1532 
1533   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1534 
1535   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1536   Address LocValue = Address::invalid();
1537   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1538   if (I != OpenMPLocThreadIDMap.end())
1539     LocValue = Address(I->second.DebugLoc, Align);
1540 
1541   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1542   // GetOpenMPThreadID was called before this routine.
1543   if (!LocValue.isValid()) {
1544     // Generate "ident_t .kmpc_loc.addr;"
1545     Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1546     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1547     Elem.second.DebugLoc = AI.getPointer();
1548     LocValue = AI;
1549 
1550     if (!Elem.second.ServiceInsertPt)
1551       setLocThreadIdInsertPt(CGF);
1552     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1553     CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1554     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1555                              CGF.getTypeSize(IdentQTy));
1556   }
1557 
1558   // char **psource = &.kmpc_loc_<flags>.addr.psource;
1559   LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1560   auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1561   LValue PSource =
1562       CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1563 
1564   llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1565   if (OMPDebugLoc == nullptr) {
1566     SmallString<128> Buffer2;
1567     llvm::raw_svector_ostream OS2(Buffer2);
1568     // Build debug location
1569     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1570     OS2 << ";" << PLoc.getFilename() << ";";
1571     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1572       OS2 << FD->getQualifiedNameAsString();
1573     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1574     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1575     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1576   }
1577   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1578   CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1579 
1580   // Our callers always pass this to a runtime function, so for
1581   // convenience, go ahead and return a naked pointer.
1582   return LocValue.getPointer();
1583 }
1584 
1585 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1586                                           SourceLocation Loc) {
1587   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1588 
1589   llvm::Value *ThreadID = nullptr;
1590   // Check whether we've already cached a load of the thread id in this
1591   // function.
1592   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1593   if (I != OpenMPLocThreadIDMap.end()) {
1594     ThreadID = I->second.ThreadID;
1595     if (ThreadID != nullptr)
1596       return ThreadID;
1597   }
1598   // If exceptions are enabled, do not use parameter to avoid possible crash.
1599   if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1600       !CGF.getLangOpts().CXXExceptions ||
1601       CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1602     if (auto *OMPRegionInfo =
1603             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1604       if (OMPRegionInfo->getThreadIDVariable()) {
1605         // Check if this an outlined function with thread id passed as argument.
1606         LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1607         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1608         // If value loaded in entry block, cache it and use it everywhere in
1609         // function.
1610         if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1611           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1612           Elem.second.ThreadID = ThreadID;
1613         }
1614         return ThreadID;
1615       }
1616     }
1617   }
1618 
1619   // This is not an outlined function region - need to call __kmpc_int32
1620   // kmpc_global_thread_num(ident_t *loc).
1621   // Generate thread id value and cache this value for use across the
1622   // function.
1623   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1624   if (!Elem.second.ServiceInsertPt)
1625     setLocThreadIdInsertPt(CGF);
1626   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1627   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1628   llvm::CallInst *Call = CGF.Builder.CreateCall(
1629       createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1630       emitUpdateLocation(CGF, Loc));
1631   Call->setCallingConv(CGF.getRuntimeCC());
1632   Elem.second.ThreadID = Call;
1633   return Call;
1634 }
1635 
1636 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1637   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1638   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1639     clearLocThreadIdInsertPt(CGF);
1640     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1641   }
1642   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1643     for(auto *D : FunctionUDRMap[CGF.CurFn])
1644       UDRMap.erase(D);
1645     FunctionUDRMap.erase(CGF.CurFn);
1646   }
1647 }
1648 
1649 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1650   return IdentTy->getPointerTo();
1651 }
1652 
1653 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1654   if (!Kmpc_MicroTy) {
1655     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1656     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1657                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1658     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1659   }
1660   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1661 }
1662 
1663 llvm::Constant *
1664 CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1665   llvm::Constant *RTLFn = nullptr;
1666   switch (static_cast<OpenMPRTLFunction>(Function)) {
1667   case OMPRTL__kmpc_fork_call: {
1668     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1669     // microtask, ...);
1670     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1671                                 getKmpc_MicroPointerTy()};
1672     auto *FnTy =
1673         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1674     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1675     break;
1676   }
1677   case OMPRTL__kmpc_global_thread_num: {
1678     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1679     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1680     auto *FnTy =
1681         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1682     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1683     break;
1684   }
1685   case OMPRTL__kmpc_threadprivate_cached: {
1686     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1687     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1688     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1689                                 CGM.VoidPtrTy, CGM.SizeTy,
1690                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1691     auto *FnTy =
1692         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1693     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1694     break;
1695   }
1696   case OMPRTL__kmpc_critical: {
1697     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1698     // kmp_critical_name *crit);
1699     llvm::Type *TypeParams[] = {
1700         getIdentTyPointerTy(), CGM.Int32Ty,
1701         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1702     auto *FnTy =
1703         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1704     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1705     break;
1706   }
1707   case OMPRTL__kmpc_critical_with_hint: {
1708     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1709     // kmp_critical_name *crit, uintptr_t hint);
1710     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1711                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
1712                                 CGM.IntPtrTy};
1713     auto *FnTy =
1714         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1715     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1716     break;
1717   }
1718   case OMPRTL__kmpc_threadprivate_register: {
1719     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1720     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1721     // typedef void *(*kmpc_ctor)(void *);
1722     auto *KmpcCtorTy =
1723         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1724                                 /*isVarArg*/ false)->getPointerTo();
1725     // typedef void *(*kmpc_cctor)(void *, void *);
1726     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1727     auto *KmpcCopyCtorTy =
1728         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1729                                 /*isVarArg*/ false)
1730             ->getPointerTo();
1731     // typedef void (*kmpc_dtor)(void *);
1732     auto *KmpcDtorTy =
1733         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1734             ->getPointerTo();
1735     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1736                               KmpcCopyCtorTy, KmpcDtorTy};
1737     auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1738                                         /*isVarArg*/ false);
1739     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1740     break;
1741   }
1742   case OMPRTL__kmpc_end_critical: {
1743     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1744     // kmp_critical_name *crit);
1745     llvm::Type *TypeParams[] = {
1746         getIdentTyPointerTy(), CGM.Int32Ty,
1747         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1748     auto *FnTy =
1749         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1750     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1751     break;
1752   }
1753   case OMPRTL__kmpc_cancel_barrier: {
1754     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1755     // global_tid);
1756     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1757     auto *FnTy =
1758         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1759     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1760     break;
1761   }
1762   case OMPRTL__kmpc_barrier: {
1763     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1764     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1765     auto *FnTy =
1766         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1767     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1768     break;
1769   }
1770   case OMPRTL__kmpc_for_static_fini: {
1771     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1772     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1773     auto *FnTy =
1774         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1775     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1776     break;
1777   }
1778   case OMPRTL__kmpc_push_num_threads: {
1779     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1780     // kmp_int32 num_threads)
1781     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1782                                 CGM.Int32Ty};
1783     auto *FnTy =
1784         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1785     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1786     break;
1787   }
1788   case OMPRTL__kmpc_serialized_parallel: {
1789     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1790     // global_tid);
1791     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1792     auto *FnTy =
1793         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1794     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1795     break;
1796   }
1797   case OMPRTL__kmpc_end_serialized_parallel: {
1798     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1799     // global_tid);
1800     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1801     auto *FnTy =
1802         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1803     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1804     break;
1805   }
1806   case OMPRTL__kmpc_flush: {
1807     // Build void __kmpc_flush(ident_t *loc);
1808     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1809     auto *FnTy =
1810         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1811     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1812     break;
1813   }
1814   case OMPRTL__kmpc_master: {
1815     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1816     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1817     auto *FnTy =
1818         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1819     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1820     break;
1821   }
1822   case OMPRTL__kmpc_end_master: {
1823     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1824     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1825     auto *FnTy =
1826         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1827     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1828     break;
1829   }
1830   case OMPRTL__kmpc_omp_taskyield: {
1831     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1832     // int end_part);
1833     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1834     auto *FnTy =
1835         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1836     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1837     break;
1838   }
1839   case OMPRTL__kmpc_single: {
1840     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1841     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1842     auto *FnTy =
1843         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1844     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1845     break;
1846   }
1847   case OMPRTL__kmpc_end_single: {
1848     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1849     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1850     auto *FnTy =
1851         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1852     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1853     break;
1854   }
1855   case OMPRTL__kmpc_omp_task_alloc: {
1856     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1857     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1858     // kmp_routine_entry_t *task_entry);
1859     assert(KmpRoutineEntryPtrTy != nullptr &&
1860            "Type kmp_routine_entry_t must be created.");
1861     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1862                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
1863     // Return void * and then cast to particular kmp_task_t type.
1864     auto *FnTy =
1865         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1866     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
1867     break;
1868   }
1869   case OMPRTL__kmpc_omp_task: {
1870     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1871     // *new_task);
1872     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1873                                 CGM.VoidPtrTy};
1874     auto *FnTy =
1875         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1876     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
1877     break;
1878   }
1879   case OMPRTL__kmpc_copyprivate: {
1880     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
1881     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
1882     // kmp_int32 didit);
1883     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1884     auto *CpyFnTy =
1885         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
1886     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
1887                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
1888                                 CGM.Int32Ty};
1889     auto *FnTy =
1890         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1891     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
1892     break;
1893   }
1894   case OMPRTL__kmpc_reduce: {
1895     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
1896     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
1897     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
1898     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1899     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1900                                                /*isVarArg=*/false);
1901     llvm::Type *TypeParams[] = {
1902         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1903         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1904         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1905     auto *FnTy =
1906         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1907     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
1908     break;
1909   }
1910   case OMPRTL__kmpc_reduce_nowait: {
1911     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
1912     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
1913     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
1914     // *lck);
1915     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1916     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1917                                                /*isVarArg=*/false);
1918     llvm::Type *TypeParams[] = {
1919         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1920         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1921         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1922     auto *FnTy =
1923         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1924     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
1925     break;
1926   }
1927   case OMPRTL__kmpc_end_reduce: {
1928     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
1929     // kmp_critical_name *lck);
1930     llvm::Type *TypeParams[] = {
1931         getIdentTyPointerTy(), CGM.Int32Ty,
1932         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1933     auto *FnTy =
1934         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1935     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
1936     break;
1937   }
1938   case OMPRTL__kmpc_end_reduce_nowait: {
1939     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
1940     // kmp_critical_name *lck);
1941     llvm::Type *TypeParams[] = {
1942         getIdentTyPointerTy(), CGM.Int32Ty,
1943         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1944     auto *FnTy =
1945         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1946     RTLFn =
1947         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
1948     break;
1949   }
1950   case OMPRTL__kmpc_omp_task_begin_if0: {
1951     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1952     // *new_task);
1953     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1954                                 CGM.VoidPtrTy};
1955     auto *FnTy =
1956         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1957     RTLFn =
1958         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
1959     break;
1960   }
1961   case OMPRTL__kmpc_omp_task_complete_if0: {
1962     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1963     // *new_task);
1964     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1965                                 CGM.VoidPtrTy};
1966     auto *FnTy =
1967         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1968     RTLFn = CGM.CreateRuntimeFunction(FnTy,
1969                                       /*Name=*/"__kmpc_omp_task_complete_if0");
1970     break;
1971   }
1972   case OMPRTL__kmpc_ordered: {
1973     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
1974     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1975     auto *FnTy =
1976         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1977     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
1978     break;
1979   }
1980   case OMPRTL__kmpc_end_ordered: {
1981     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
1982     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1983     auto *FnTy =
1984         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1985     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
1986     break;
1987   }
1988   case OMPRTL__kmpc_omp_taskwait: {
1989     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
1990     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1991     auto *FnTy =
1992         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1993     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
1994     break;
1995   }
1996   case OMPRTL__kmpc_taskgroup: {
1997     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
1998     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1999     auto *FnTy =
2000         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2001     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
2002     break;
2003   }
2004   case OMPRTL__kmpc_end_taskgroup: {
2005     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
2006     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2007     auto *FnTy =
2008         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2009     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
2010     break;
2011   }
2012   case OMPRTL__kmpc_push_proc_bind: {
2013     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
2014     // int proc_bind)
2015     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2016     auto *FnTy =
2017         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2018     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
2019     break;
2020   }
2021   case OMPRTL__kmpc_omp_task_with_deps: {
2022     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2023     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2024     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
2025     llvm::Type *TypeParams[] = {
2026         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
2027         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
2028     auto *FnTy =
2029         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2030     RTLFn =
2031         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
2032     break;
2033   }
2034   case OMPRTL__kmpc_omp_wait_deps: {
2035     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2036     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
2037     // kmp_depend_info_t *noalias_dep_list);
2038     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2039                                 CGM.Int32Ty,           CGM.VoidPtrTy,
2040                                 CGM.Int32Ty,           CGM.VoidPtrTy};
2041     auto *FnTy =
2042         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2043     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
2044     break;
2045   }
2046   case OMPRTL__kmpc_cancellationpoint: {
2047     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2048     // global_tid, kmp_int32 cncl_kind)
2049     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2050     auto *FnTy =
2051         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2052     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
2053     break;
2054   }
2055   case OMPRTL__kmpc_cancel: {
2056     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2057     // kmp_int32 cncl_kind)
2058     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2059     auto *FnTy =
2060         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2061     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
2062     break;
2063   }
2064   case OMPRTL__kmpc_push_num_teams: {
2065     // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
2066     // kmp_int32 num_teams, kmp_int32 num_threads)
2067     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2068         CGM.Int32Ty};
2069     auto *FnTy =
2070         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2071     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
2072     break;
2073   }
2074   case OMPRTL__kmpc_fork_teams: {
2075     // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
2076     // microtask, ...);
2077     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2078                                 getKmpc_MicroPointerTy()};
2079     auto *FnTy =
2080         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
2081     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
2082     break;
2083   }
2084   case OMPRTL__kmpc_taskloop: {
2085     // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
2086     // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
2087     // sched, kmp_uint64 grainsize, void *task_dup);
2088     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2089                                 CGM.IntTy,
2090                                 CGM.VoidPtrTy,
2091                                 CGM.IntTy,
2092                                 CGM.Int64Ty->getPointerTo(),
2093                                 CGM.Int64Ty->getPointerTo(),
2094                                 CGM.Int64Ty,
2095                                 CGM.IntTy,
2096                                 CGM.IntTy,
2097                                 CGM.Int64Ty,
2098                                 CGM.VoidPtrTy};
2099     auto *FnTy =
2100         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2101     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2102     break;
2103   }
2104   case OMPRTL__kmpc_doacross_init: {
2105     // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2106     // num_dims, struct kmp_dim *dims);
2107     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2108                                 CGM.Int32Ty,
2109                                 CGM.Int32Ty,
2110                                 CGM.VoidPtrTy};
2111     auto *FnTy =
2112         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2113     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2114     break;
2115   }
2116   case OMPRTL__kmpc_doacross_fini: {
2117     // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2118     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2119     auto *FnTy =
2120         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2121     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2122     break;
2123   }
2124   case OMPRTL__kmpc_doacross_post: {
2125     // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2126     // *vec);
2127     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2128                                 CGM.Int64Ty->getPointerTo()};
2129     auto *FnTy =
2130         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2131     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2132     break;
2133   }
2134   case OMPRTL__kmpc_doacross_wait: {
2135     // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2136     // *vec);
2137     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2138                                 CGM.Int64Ty->getPointerTo()};
2139     auto *FnTy =
2140         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2141     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2142     break;
2143   }
2144   case OMPRTL__kmpc_task_reduction_init: {
2145     // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2146     // *data);
2147     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2148     auto *FnTy =
2149         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2150     RTLFn =
2151         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2152     break;
2153   }
2154   case OMPRTL__kmpc_task_reduction_get_th_data: {
2155     // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2156     // *d);
2157     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2158     auto *FnTy =
2159         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2160     RTLFn = CGM.CreateRuntimeFunction(
2161         FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2162     break;
2163   }
2164   case OMPRTL__tgt_target: {
2165     // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2166     // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2167     // *arg_types);
2168     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2169                                 CGM.VoidPtrTy,
2170                                 CGM.Int32Ty,
2171                                 CGM.VoidPtrPtrTy,
2172                                 CGM.VoidPtrPtrTy,
2173                                 CGM.SizeTy->getPointerTo(),
2174                                 CGM.Int64Ty->getPointerTo()};
2175     auto *FnTy =
2176         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2177     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2178     break;
2179   }
2180   case OMPRTL__tgt_target_nowait: {
2181     // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2182     // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
2183     // int64_t *arg_types);
2184     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2185                                 CGM.VoidPtrTy,
2186                                 CGM.Int32Ty,
2187                                 CGM.VoidPtrPtrTy,
2188                                 CGM.VoidPtrPtrTy,
2189                                 CGM.SizeTy->getPointerTo(),
2190                                 CGM.Int64Ty->getPointerTo()};
2191     auto *FnTy =
2192         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2193     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2194     break;
2195   }
2196   case OMPRTL__tgt_target_teams: {
2197     // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2198     // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
2199     // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2200     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2201                                 CGM.VoidPtrTy,
2202                                 CGM.Int32Ty,
2203                                 CGM.VoidPtrPtrTy,
2204                                 CGM.VoidPtrPtrTy,
2205                                 CGM.SizeTy->getPointerTo(),
2206                                 CGM.Int64Ty->getPointerTo(),
2207                                 CGM.Int32Ty,
2208                                 CGM.Int32Ty};
2209     auto *FnTy =
2210         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2211     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2212     break;
2213   }
2214   case OMPRTL__tgt_target_teams_nowait: {
2215     // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2216     // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t
2217     // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2218     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2219                                 CGM.VoidPtrTy,
2220                                 CGM.Int32Ty,
2221                                 CGM.VoidPtrPtrTy,
2222                                 CGM.VoidPtrPtrTy,
2223                                 CGM.SizeTy->getPointerTo(),
2224                                 CGM.Int64Ty->getPointerTo(),
2225                                 CGM.Int32Ty,
2226                                 CGM.Int32Ty};
2227     auto *FnTy =
2228         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2229     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2230     break;
2231   }
2232   case OMPRTL__tgt_register_lib: {
2233     // Build void __tgt_register_lib(__tgt_bin_desc *desc);
2234     QualType ParamTy =
2235         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2236     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2237     auto *FnTy =
2238         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2239     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
2240     break;
2241   }
2242   case OMPRTL__tgt_unregister_lib: {
2243     // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
2244     QualType ParamTy =
2245         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2246     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2247     auto *FnTy =
2248         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2249     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
2250     break;
2251   }
2252   case OMPRTL__tgt_target_data_begin: {
2253     // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2254     // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2255     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2256                                 CGM.Int32Ty,
2257                                 CGM.VoidPtrPtrTy,
2258                                 CGM.VoidPtrPtrTy,
2259                                 CGM.SizeTy->getPointerTo(),
2260                                 CGM.Int64Ty->getPointerTo()};
2261     auto *FnTy =
2262         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2263     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2264     break;
2265   }
2266   case OMPRTL__tgt_target_data_begin_nowait: {
2267     // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2268     // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2269     // *arg_types);
2270     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2271                                 CGM.Int32Ty,
2272                                 CGM.VoidPtrPtrTy,
2273                                 CGM.VoidPtrPtrTy,
2274                                 CGM.SizeTy->getPointerTo(),
2275                                 CGM.Int64Ty->getPointerTo()};
2276     auto *FnTy =
2277         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2278     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2279     break;
2280   }
2281   case OMPRTL__tgt_target_data_end: {
2282     // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2283     // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2284     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2285                                 CGM.Int32Ty,
2286                                 CGM.VoidPtrPtrTy,
2287                                 CGM.VoidPtrPtrTy,
2288                                 CGM.SizeTy->getPointerTo(),
2289                                 CGM.Int64Ty->getPointerTo()};
2290     auto *FnTy =
2291         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2292     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2293     break;
2294   }
2295   case OMPRTL__tgt_target_data_end_nowait: {
2296     // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2297     // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2298     // *arg_types);
2299     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2300                                 CGM.Int32Ty,
2301                                 CGM.VoidPtrPtrTy,
2302                                 CGM.VoidPtrPtrTy,
2303                                 CGM.SizeTy->getPointerTo(),
2304                                 CGM.Int64Ty->getPointerTo()};
2305     auto *FnTy =
2306         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2307     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2308     break;
2309   }
2310   case OMPRTL__tgt_target_data_update: {
2311     // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2312     // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2313     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2314                                 CGM.Int32Ty,
2315                                 CGM.VoidPtrPtrTy,
2316                                 CGM.VoidPtrPtrTy,
2317                                 CGM.SizeTy->getPointerTo(),
2318                                 CGM.Int64Ty->getPointerTo()};
2319     auto *FnTy =
2320         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2321     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2322     break;
2323   }
2324   case OMPRTL__tgt_target_data_update_nowait: {
2325     // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2326     // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2327     // *arg_types);
2328     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2329                                 CGM.Int32Ty,
2330                                 CGM.VoidPtrPtrTy,
2331                                 CGM.VoidPtrPtrTy,
2332                                 CGM.SizeTy->getPointerTo(),
2333                                 CGM.Int64Ty->getPointerTo()};
2334     auto *FnTy =
2335         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2336     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2337     break;
2338   }
2339   }
2340   assert(RTLFn && "Unable to find OpenMP runtime function");
2341   return RTLFn;
2342 }
2343 
2344 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
2345                                                              bool IVSigned) {
2346   assert((IVSize == 32 || IVSize == 64) &&
2347          "IV size is not compatible with the omp runtime");
2348   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2349                                             : "__kmpc_for_static_init_4u")
2350                                 : (IVSigned ? "__kmpc_for_static_init_8"
2351                                             : "__kmpc_for_static_init_8u");
2352   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2353   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2354   llvm::Type *TypeParams[] = {
2355     getIdentTyPointerTy(),                     // loc
2356     CGM.Int32Ty,                               // tid
2357     CGM.Int32Ty,                               // schedtype
2358     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2359     PtrTy,                                     // p_lower
2360     PtrTy,                                     // p_upper
2361     PtrTy,                                     // p_stride
2362     ITy,                                       // incr
2363     ITy                                        // chunk
2364   };
2365   auto *FnTy =
2366       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2367   return CGM.CreateRuntimeFunction(FnTy, Name);
2368 }
2369 
2370 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
2371                                                             bool IVSigned) {
2372   assert((IVSize == 32 || IVSize == 64) &&
2373          "IV size is not compatible with the omp runtime");
2374   StringRef Name =
2375       IVSize == 32
2376           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2377           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2378   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2379   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2380                                CGM.Int32Ty,           // tid
2381                                CGM.Int32Ty,           // schedtype
2382                                ITy,                   // lower
2383                                ITy,                   // upper
2384                                ITy,                   // stride
2385                                ITy                    // chunk
2386   };
2387   auto *FnTy =
2388       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2389   return CGM.CreateRuntimeFunction(FnTy, Name);
2390 }
2391 
2392 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
2393                                                             bool IVSigned) {
2394   assert((IVSize == 32 || IVSize == 64) &&
2395          "IV size is not compatible with the omp runtime");
2396   StringRef Name =
2397       IVSize == 32
2398           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2399           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2400   llvm::Type *TypeParams[] = {
2401       getIdentTyPointerTy(), // loc
2402       CGM.Int32Ty,           // tid
2403   };
2404   auto *FnTy =
2405       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2406   return CGM.CreateRuntimeFunction(FnTy, Name);
2407 }
2408 
2409 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
2410                                                             bool IVSigned) {
2411   assert((IVSize == 32 || IVSize == 64) &&
2412          "IV size is not compatible with the omp runtime");
2413   StringRef Name =
2414       IVSize == 32
2415           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2416           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2417   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2418   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2419   llvm::Type *TypeParams[] = {
2420     getIdentTyPointerTy(),                     // loc
2421     CGM.Int32Ty,                               // tid
2422     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2423     PtrTy,                                     // p_lower
2424     PtrTy,                                     // p_upper
2425     PtrTy                                      // p_stride
2426   };
2427   auto *FnTy =
2428       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2429   return CGM.CreateRuntimeFunction(FnTy, Name);
2430 }
2431 
2432 Address CGOpenMPRuntime::getAddrOfDeclareTargetLink(const VarDecl *VD) {
2433   if (CGM.getLangOpts().OpenMPSimd)
2434     return Address::invalid();
2435   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2436       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2437   if (Res && *Res == OMPDeclareTargetDeclAttr::MT_Link) {
2438     SmallString<64> PtrName;
2439     {
2440       llvm::raw_svector_ostream OS(PtrName);
2441       OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_link_ptr";
2442     }
2443     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2444     if (!Ptr) {
2445       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2446       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
2447                                         PtrName);
2448       if (!CGM.getLangOpts().OpenMPIsDevice) {
2449         auto *GV = cast<llvm::GlobalVariable>(Ptr);
2450         GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
2451         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2452       }
2453       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ptr));
2454       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2455     }
2456     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2457   }
2458   return Address::invalid();
2459 }
2460 
2461 llvm::Constant *
2462 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
2463   assert(!CGM.getLangOpts().OpenMPUseTLS ||
2464          !CGM.getContext().getTargetInfo().isTLSSupported());
2465   // Lookup the entry, lazily creating it if necessary.
2466   std::string Suffix = getName({"cache", ""});
2467   return getOrCreateInternalVariable(
2468       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
2469 }
2470 
2471 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
2472                                                 const VarDecl *VD,
2473                                                 Address VDAddr,
2474                                                 SourceLocation Loc) {
2475   if (CGM.getLangOpts().OpenMPUseTLS &&
2476       CGM.getContext().getTargetInfo().isTLSSupported())
2477     return VDAddr;
2478 
2479   llvm::Type *VarTy = VDAddr.getElementType();
2480   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2481                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2482                                                        CGM.Int8PtrTy),
2483                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
2484                          getOrCreateThreadPrivateCache(VD)};
2485   return Address(CGF.EmitRuntimeCall(
2486       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2487                  VDAddr.getAlignment());
2488 }
2489 
2490 void CGOpenMPRuntime::emitThreadPrivateVarInit(
2491     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2492     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2493   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2494   // library.
2495   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
2496   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
2497                       OMPLoc);
2498   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2499   // to register constructor/destructor for variable.
2500   llvm::Value *Args[] = {
2501       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
2502       Ctor, CopyCtor, Dtor};
2503   CGF.EmitRuntimeCall(
2504       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
2505 }
2506 
2507 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
2508     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2509     bool PerformInit, CodeGenFunction *CGF) {
2510   if (CGM.getLangOpts().OpenMPUseTLS &&
2511       CGM.getContext().getTargetInfo().isTLSSupported())
2512     return nullptr;
2513 
2514   VD = VD->getDefinition(CGM.getContext());
2515   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
2516     QualType ASTTy = VD->getType();
2517 
2518     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2519     const Expr *Init = VD->getAnyInitializer();
2520     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2521       // Generate function that re-emits the declaration's initializer into the
2522       // threadprivate copy of the variable VD
2523       CodeGenFunction CtorCGF(CGM);
2524       FunctionArgList Args;
2525       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2526                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2527                             ImplicitParamDecl::Other);
2528       Args.push_back(&Dst);
2529 
2530       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2531           CGM.getContext().VoidPtrTy, Args);
2532       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2533       std::string Name = getName({"__kmpc_global_ctor_", ""});
2534       llvm::Function *Fn =
2535           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2536       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2537                             Args, Loc, Loc);
2538       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
2539           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2540           CGM.getContext().VoidPtrTy, Dst.getLocation());
2541       Address Arg = Address(ArgVal, VDAddr.getAlignment());
2542       Arg = CtorCGF.Builder.CreateElementBitCast(
2543           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2544       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2545                                /*IsInitializer=*/true);
2546       ArgVal = CtorCGF.EmitLoadOfScalar(
2547           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2548           CGM.getContext().VoidPtrTy, Dst.getLocation());
2549       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2550       CtorCGF.FinishFunction();
2551       Ctor = Fn;
2552     }
2553     if (VD->getType().isDestructedType() != QualType::DK_none) {
2554       // Generate function that emits destructor call for the threadprivate copy
2555       // of the variable VD
2556       CodeGenFunction DtorCGF(CGM);
2557       FunctionArgList Args;
2558       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2559                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2560                             ImplicitParamDecl::Other);
2561       Args.push_back(&Dst);
2562 
2563       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2564           CGM.getContext().VoidTy, Args);
2565       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2566       std::string Name = getName({"__kmpc_global_dtor_", ""});
2567       llvm::Function *Fn =
2568           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2569       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2570       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2571                             Loc, Loc);
2572       // Create a scope with an artificial location for the body of this function.
2573       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2574       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
2575           DtorCGF.GetAddrOfLocalVar(&Dst),
2576           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2577       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2578                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2579                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2580       DtorCGF.FinishFunction();
2581       Dtor = Fn;
2582     }
2583     // Do not emit init function if it is not required.
2584     if (!Ctor && !Dtor)
2585       return nullptr;
2586 
2587     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2588     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2589                                                /*isVarArg=*/false)
2590                            ->getPointerTo();
2591     // Copying constructor for the threadprivate variable.
2592     // Must be NULL - reserved by runtime, but currently it requires that this
2593     // parameter is always NULL. Otherwise it fires assertion.
2594     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2595     if (Ctor == nullptr) {
2596       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2597                                              /*isVarArg=*/false)
2598                          ->getPointerTo();
2599       Ctor = llvm::Constant::getNullValue(CtorTy);
2600     }
2601     if (Dtor == nullptr) {
2602       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2603                                              /*isVarArg=*/false)
2604                          ->getPointerTo();
2605       Dtor = llvm::Constant::getNullValue(DtorTy);
2606     }
2607     if (!CGF) {
2608       auto *InitFunctionTy =
2609           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2610       std::string Name = getName({"__omp_threadprivate_init_", ""});
2611       llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2612           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
2613       CodeGenFunction InitCGF(CGM);
2614       FunctionArgList ArgList;
2615       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2616                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
2617                             Loc, Loc);
2618       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2619       InitCGF.FinishFunction();
2620       return InitFunction;
2621     }
2622     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2623   }
2624   return nullptr;
2625 }
2626 
2627 /// Obtain information that uniquely identifies a target entry. This
2628 /// consists of the file and device IDs as well as line number associated with
2629 /// the relevant entry source location.
2630 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
2631                                      unsigned &DeviceID, unsigned &FileID,
2632                                      unsigned &LineNum) {
2633   SourceManager &SM = C.getSourceManager();
2634 
2635   // The loc should be always valid and have a file ID (the user cannot use
2636   // #pragma directives in macros)
2637 
2638   assert(Loc.isValid() && "Source location is expected to be always valid.");
2639 
2640   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2641   assert(PLoc.isValid() && "Source location is expected to be always valid.");
2642 
2643   llvm::sys::fs::UniqueID ID;
2644   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2645     SM.getDiagnostics().Report(diag::err_cannot_open_file)
2646         << PLoc.getFilename() << EC.message();
2647 
2648   DeviceID = ID.getDevice();
2649   FileID = ID.getFile();
2650   LineNum = PLoc.getLine();
2651 }
2652 
2653 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
2654                                                      llvm::GlobalVariable *Addr,
2655                                                      bool PerformInit) {
2656   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2657       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2658   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link)
2659     return CGM.getLangOpts().OpenMPIsDevice;
2660   VD = VD->getDefinition(CGM.getContext());
2661   if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
2662     return CGM.getLangOpts().OpenMPIsDevice;
2663 
2664   QualType ASTTy = VD->getType();
2665 
2666   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
2667   // Produce the unique prefix to identify the new target regions. We use
2668   // the source location of the variable declaration which we know to not
2669   // conflict with any target region.
2670   unsigned DeviceID;
2671   unsigned FileID;
2672   unsigned Line;
2673   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2674   SmallString<128> Buffer, Out;
2675   {
2676     llvm::raw_svector_ostream OS(Buffer);
2677     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2678        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2679   }
2680 
2681   const Expr *Init = VD->getAnyInitializer();
2682   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2683     llvm::Constant *Ctor;
2684     llvm::Constant *ID;
2685     if (CGM.getLangOpts().OpenMPIsDevice) {
2686       // Generate function that re-emits the declaration's initializer into
2687       // the threadprivate copy of the variable VD
2688       CodeGenFunction CtorCGF(CGM);
2689 
2690       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2691       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2692       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2693           FTy, Twine(Buffer, "_ctor"), FI, Loc);
2694       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2695       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2696                             FunctionArgList(), Loc, Loc);
2697       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2698       CtorCGF.EmitAnyExprToMem(Init,
2699                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
2700                                Init->getType().getQualifiers(),
2701                                /*IsInitializer=*/true);
2702       CtorCGF.FinishFunction();
2703       Ctor = Fn;
2704       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2705       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
2706     } else {
2707       Ctor = new llvm::GlobalVariable(
2708           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2709           llvm::GlobalValue::PrivateLinkage,
2710           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2711       ID = Ctor;
2712     }
2713 
2714     // Register the information for the entry associated with the constructor.
2715     Out.clear();
2716     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2717         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2718         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
2719   }
2720   if (VD->getType().isDestructedType() != QualType::DK_none) {
2721     llvm::Constant *Dtor;
2722     llvm::Constant *ID;
2723     if (CGM.getLangOpts().OpenMPIsDevice) {
2724       // Generate function that emits destructor call for the threadprivate
2725       // copy of the variable VD
2726       CodeGenFunction DtorCGF(CGM);
2727 
2728       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2729       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2730       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2731           FTy, Twine(Buffer, "_dtor"), FI, Loc);
2732       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2733       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2734                             FunctionArgList(), Loc, Loc);
2735       // Create a scope with an artificial location for the body of this
2736       // function.
2737       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2738       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2739                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2740                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2741       DtorCGF.FinishFunction();
2742       Dtor = Fn;
2743       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2744       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
2745     } else {
2746       Dtor = new llvm::GlobalVariable(
2747           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2748           llvm::GlobalValue::PrivateLinkage,
2749           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2750       ID = Dtor;
2751     }
2752     // Register the information for the entry associated with the destructor.
2753     Out.clear();
2754     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2755         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2756         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2757   }
2758   return CGM.getLangOpts().OpenMPIsDevice;
2759 }
2760 
2761 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2762                                                           QualType VarType,
2763                                                           StringRef Name) {
2764   std::string Suffix = getName({"artificial", ""});
2765   std::string CacheSuffix = getName({"cache", ""});
2766   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2767   llvm::Value *GAddr =
2768       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2769   llvm::Value *Args[] = {
2770       emitUpdateLocation(CGF, SourceLocation()),
2771       getThreadID(CGF, SourceLocation()),
2772       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2773       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2774                                 /*IsSigned=*/false),
2775       getOrCreateInternalVariable(
2776           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2777   return Address(
2778       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2779           CGF.EmitRuntimeCall(
2780               createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2781           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2782       CGM.getPointerAlign());
2783 }
2784 
2785 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
2786                                       const RegionCodeGenTy &ThenGen,
2787                                       const RegionCodeGenTy &ElseGen) {
2788   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2789 
2790   // If the condition constant folds and can be elided, try to avoid emitting
2791   // the condition and the dead arm of the if/else.
2792   bool CondConstant;
2793   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2794     if (CondConstant)
2795       ThenGen(CGF);
2796     else
2797       ElseGen(CGF);
2798     return;
2799   }
2800 
2801   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2802   // emit the conditional branch.
2803   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2804   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2805   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2806   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2807 
2808   // Emit the 'then' code.
2809   CGF.EmitBlock(ThenBlock);
2810   ThenGen(CGF);
2811   CGF.EmitBranch(ContBlock);
2812   // Emit the 'else' code if present.
2813   // There is no need to emit line number for unconditional branch.
2814   (void)ApplyDebugLocation::CreateEmpty(CGF);
2815   CGF.EmitBlock(ElseBlock);
2816   ElseGen(CGF);
2817   // There is no need to emit line number for unconditional branch.
2818   (void)ApplyDebugLocation::CreateEmpty(CGF);
2819   CGF.EmitBranch(ContBlock);
2820   // Emit the continuation block for code after the if.
2821   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2822 }
2823 
2824 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2825                                        llvm::Value *OutlinedFn,
2826                                        ArrayRef<llvm::Value *> CapturedVars,
2827                                        const Expr *IfCond) {
2828   if (!CGF.HaveInsertPoint())
2829     return;
2830   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2831   auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
2832                                                      PrePostActionTy &) {
2833     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2834     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2835     llvm::Value *Args[] = {
2836         RTLoc,
2837         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2838         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2839     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2840     RealArgs.append(std::begin(Args), std::end(Args));
2841     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2842 
2843     llvm::Value *RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
2844     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2845   };
2846   auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
2847                                                           PrePostActionTy &) {
2848     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2849     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2850     // Build calls:
2851     // __kmpc_serialized_parallel(&Loc, GTid);
2852     llvm::Value *Args[] = {RTLoc, ThreadID};
2853     CGF.EmitRuntimeCall(
2854         RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
2855 
2856     // OutlinedFn(&GTid, &zero, CapturedStruct);
2857     Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2858                                                         /*Name*/ ".zero.addr");
2859     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
2860     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2861     // ThreadId for serialized parallels is 0.
2862     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2863     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2864     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2865     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2866 
2867     // __kmpc_end_serialized_parallel(&Loc, GTid);
2868     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2869     CGF.EmitRuntimeCall(
2870         RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
2871         EndArgs);
2872   };
2873   if (IfCond) {
2874     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
2875   } else {
2876     RegionCodeGenTy ThenRCG(ThenGen);
2877     ThenRCG(CGF);
2878   }
2879 }
2880 
2881 // If we're inside an (outlined) parallel region, use the region info's
2882 // thread-ID variable (it is passed in a first argument of the outlined function
2883 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2884 // regular serial code region, get thread ID by calling kmp_int32
2885 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2886 // return the address of that temp.
2887 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2888                                              SourceLocation Loc) {
2889   if (auto *OMPRegionInfo =
2890           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2891     if (OMPRegionInfo->getThreadIDVariable())
2892       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
2893 
2894   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2895   QualType Int32Ty =
2896       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2897   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2898   CGF.EmitStoreOfScalar(ThreadID,
2899                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2900 
2901   return ThreadIDTemp;
2902 }
2903 
2904 llvm::Constant *
2905 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
2906                                              const llvm::Twine &Name) {
2907   SmallString<256> Buffer;
2908   llvm::raw_svector_ostream Out(Buffer);
2909   Out << Name;
2910   StringRef RuntimeName = Out.str();
2911   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2912   if (Elem.second) {
2913     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2914            "OMP internal variable has different type than requested");
2915     return &*Elem.second;
2916   }
2917 
2918   return Elem.second = new llvm::GlobalVariable(
2919              CGM.getModule(), Ty, /*IsConstant*/ false,
2920              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2921              Elem.first());
2922 }
2923 
2924 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2925   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2926   std::string Name = getName({Prefix, "var"});
2927   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2928 }
2929 
2930 namespace {
2931 /// Common pre(post)-action for different OpenMP constructs.
2932 class CommonActionTy final : public PrePostActionTy {
2933   llvm::Value *EnterCallee;
2934   ArrayRef<llvm::Value *> EnterArgs;
2935   llvm::Value *ExitCallee;
2936   ArrayRef<llvm::Value *> ExitArgs;
2937   bool Conditional;
2938   llvm::BasicBlock *ContBlock = nullptr;
2939 
2940 public:
2941   CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs,
2942                  llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs,
2943                  bool Conditional = false)
2944       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2945         ExitArgs(ExitArgs), Conditional(Conditional) {}
2946   void Enter(CodeGenFunction &CGF) override {
2947     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2948     if (Conditional) {
2949       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2950       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2951       ContBlock = CGF.createBasicBlock("omp_if.end");
2952       // Generate the branch (If-stmt)
2953       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2954       CGF.EmitBlock(ThenBlock);
2955     }
2956   }
2957   void Done(CodeGenFunction &CGF) {
2958     // Emit the rest of blocks/branches
2959     CGF.EmitBranch(ContBlock);
2960     CGF.EmitBlock(ContBlock, true);
2961   }
2962   void Exit(CodeGenFunction &CGF) override {
2963     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2964   }
2965 };
2966 } // anonymous namespace
2967 
2968 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2969                                          StringRef CriticalName,
2970                                          const RegionCodeGenTy &CriticalOpGen,
2971                                          SourceLocation Loc, const Expr *Hint) {
2972   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2973   // CriticalOpGen();
2974   // __kmpc_end_critical(ident_t *, gtid, Lock);
2975   // Prepare arguments and build a call to __kmpc_critical
2976   if (!CGF.HaveInsertPoint())
2977     return;
2978   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2979                          getCriticalRegionLock(CriticalName)};
2980   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2981                                                 std::end(Args));
2982   if (Hint) {
2983     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2984         CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
2985   }
2986   CommonActionTy Action(
2987       createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
2988                                  : OMPRTL__kmpc_critical),
2989       EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
2990   CriticalOpGen.setAction(Action);
2991   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2992 }
2993 
2994 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2995                                        const RegionCodeGenTy &MasterOpGen,
2996                                        SourceLocation Loc) {
2997   if (!CGF.HaveInsertPoint())
2998     return;
2999   // if(__kmpc_master(ident_t *, gtid)) {
3000   //   MasterOpGen();
3001   //   __kmpc_end_master(ident_t *, gtid);
3002   // }
3003   // Prepare arguments and build a call to __kmpc_master
3004   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3005   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
3006                         createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
3007                         /*Conditional=*/true);
3008   MasterOpGen.setAction(Action);
3009   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
3010   Action.Done(CGF);
3011 }
3012 
3013 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
3014                                         SourceLocation Loc) {
3015   if (!CGF.HaveInsertPoint())
3016     return;
3017   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
3018   llvm::Value *Args[] = {
3019       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3020       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
3021   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
3022   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3023     Region->emitUntiedSwitch(CGF);
3024 }
3025 
3026 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
3027                                           const RegionCodeGenTy &TaskgroupOpGen,
3028                                           SourceLocation Loc) {
3029   if (!CGF.HaveInsertPoint())
3030     return;
3031   // __kmpc_taskgroup(ident_t *, gtid);
3032   // TaskgroupOpGen();
3033   // __kmpc_end_taskgroup(ident_t *, gtid);
3034   // Prepare arguments and build a call to __kmpc_taskgroup
3035   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3036   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
3037                         createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
3038                         Args);
3039   TaskgroupOpGen.setAction(Action);
3040   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
3041 }
3042 
3043 /// Given an array of pointers to variables, project the address of a
3044 /// given variable.
3045 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
3046                                       unsigned Index, const VarDecl *Var) {
3047   // Pull out the pointer to the variable.
3048   Address PtrAddr =
3049       CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize());
3050   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
3051 
3052   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
3053   Addr = CGF.Builder.CreateElementBitCast(
3054       Addr, CGF.ConvertTypeForMem(Var->getType()));
3055   return Addr;
3056 }
3057 
3058 static llvm::Value *emitCopyprivateCopyFunction(
3059     CodeGenModule &CGM, llvm::Type *ArgsType,
3060     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
3061     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
3062     SourceLocation Loc) {
3063   ASTContext &C = CGM.getContext();
3064   // void copy_func(void *LHSArg, void *RHSArg);
3065   FunctionArgList Args;
3066   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3067                            ImplicitParamDecl::Other);
3068   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3069                            ImplicitParamDecl::Other);
3070   Args.push_back(&LHSArg);
3071   Args.push_back(&RHSArg);
3072   const auto &CGFI =
3073       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3074   std::string Name =
3075       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
3076   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
3077                                     llvm::GlobalValue::InternalLinkage, Name,
3078                                     &CGM.getModule());
3079   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
3080   Fn->setDoesNotRecurse();
3081   CodeGenFunction CGF(CGM);
3082   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
3083   // Dest = (void*[n])(LHSArg);
3084   // Src = (void*[n])(RHSArg);
3085   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3086       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3087       ArgsType), CGF.getPointerAlign());
3088   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3089       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3090       ArgsType), CGF.getPointerAlign());
3091   // *(Type0*)Dst[0] = *(Type0*)Src[0];
3092   // *(Type1*)Dst[1] = *(Type1*)Src[1];
3093   // ...
3094   // *(Typen*)Dst[n] = *(Typen*)Src[n];
3095   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
3096     const auto *DestVar =
3097         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3098     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3099 
3100     const auto *SrcVar =
3101         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3102     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3103 
3104     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3105     QualType Type = VD->getType();
3106     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3107   }
3108   CGF.FinishFunction();
3109   return Fn;
3110 }
3111 
3112 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
3113                                        const RegionCodeGenTy &SingleOpGen,
3114                                        SourceLocation Loc,
3115                                        ArrayRef<const Expr *> CopyprivateVars,
3116                                        ArrayRef<const Expr *> SrcExprs,
3117                                        ArrayRef<const Expr *> DstExprs,
3118                                        ArrayRef<const Expr *> AssignmentOps) {
3119   if (!CGF.HaveInsertPoint())
3120     return;
3121   assert(CopyprivateVars.size() == SrcExprs.size() &&
3122          CopyprivateVars.size() == DstExprs.size() &&
3123          CopyprivateVars.size() == AssignmentOps.size());
3124   ASTContext &C = CGM.getContext();
3125   // int32 did_it = 0;
3126   // if(__kmpc_single(ident_t *, gtid)) {
3127   //   SingleOpGen();
3128   //   __kmpc_end_single(ident_t *, gtid);
3129   //   did_it = 1;
3130   // }
3131   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3132   // <copy_func>, did_it);
3133 
3134   Address DidIt = Address::invalid();
3135   if (!CopyprivateVars.empty()) {
3136     // int32 did_it = 0;
3137     QualType KmpInt32Ty =
3138         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3139     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3140     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3141   }
3142   // Prepare arguments and build a call to __kmpc_single
3143   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3144   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3145                         createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
3146                         /*Conditional=*/true);
3147   SingleOpGen.setAction(Action);
3148   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3149   if (DidIt.isValid()) {
3150     // did_it = 1;
3151     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3152   }
3153   Action.Done(CGF);
3154   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3155   // <copy_func>, did_it);
3156   if (DidIt.isValid()) {
3157     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3158     QualType CopyprivateArrayTy =
3159         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
3160                                /*IndexTypeQuals=*/0);
3161     // Create a list of all private variables for copyprivate.
3162     Address CopyprivateList =
3163         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3164     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
3165       Address Elem = CGF.Builder.CreateConstArrayGEP(
3166           CopyprivateList, I, CGF.getPointerSize());
3167       CGF.Builder.CreateStore(
3168           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3169               CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
3170           Elem);
3171     }
3172     // Build function that copies private values from single region to all other
3173     // threads in the corresponding parallel region.
3174     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
3175         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
3176         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3177     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3178     Address CL =
3179       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3180                                                       CGF.VoidPtrTy);
3181     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
3182     llvm::Value *Args[] = {
3183         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3184         getThreadID(CGF, Loc),        // i32 <gtid>
3185         BufSize,                      // size_t <buf_size>
3186         CL.getPointer(),              // void *<copyprivate list>
3187         CpyFn,                        // void (*) (void *, void *) <copy_func>
3188         DidItVal                      // i32 did_it
3189     };
3190     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
3191   }
3192 }
3193 
3194 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
3195                                         const RegionCodeGenTy &OrderedOpGen,
3196                                         SourceLocation Loc, bool IsThreads) {
3197   if (!CGF.HaveInsertPoint())
3198     return;
3199   // __kmpc_ordered(ident_t *, gtid);
3200   // OrderedOpGen();
3201   // __kmpc_end_ordered(ident_t *, gtid);
3202   // Prepare arguments and build a call to __kmpc_ordered
3203   if (IsThreads) {
3204     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3205     CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3206                           createRuntimeFunction(OMPRTL__kmpc_end_ordered),
3207                           Args);
3208     OrderedOpGen.setAction(Action);
3209     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3210     return;
3211   }
3212   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3213 }
3214 
3215 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
3216                                       OpenMPDirectiveKind Kind, bool EmitChecks,
3217                                       bool ForceSimpleCall) {
3218   if (!CGF.HaveInsertPoint())
3219     return;
3220   // Build call __kmpc_cancel_barrier(loc, thread_id);
3221   // Build call __kmpc_barrier(loc, thread_id);
3222   unsigned Flags;
3223   if (Kind == OMPD_for)
3224     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3225   else if (Kind == OMPD_sections)
3226     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3227   else if (Kind == OMPD_single)
3228     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3229   else if (Kind == OMPD_barrier)
3230     Flags = OMP_IDENT_BARRIER_EXPL;
3231   else
3232     Flags = OMP_IDENT_BARRIER_IMPL;
3233   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3234   // thread_id);
3235   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3236                          getThreadID(CGF, Loc)};
3237   if (auto *OMPRegionInfo =
3238           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
3239     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
3240       llvm::Value *Result = CGF.EmitRuntimeCall(
3241           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
3242       if (EmitChecks) {
3243         // if (__kmpc_cancel_barrier()) {
3244         //   exit from construct;
3245         // }
3246         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
3247         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
3248         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
3249         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3250         CGF.EmitBlock(ExitBB);
3251         //   exit from construct;
3252         CodeGenFunction::JumpDest CancelDestination =
3253             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3254         CGF.EmitBranchThroughCleanup(CancelDestination);
3255         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3256       }
3257       return;
3258     }
3259   }
3260   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
3261 }
3262 
3263 /// Map the OpenMP loop schedule to the runtime enumeration.
3264 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
3265                                           bool Chunked, bool Ordered) {
3266   switch (ScheduleKind) {
3267   case OMPC_SCHEDULE_static:
3268     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
3269                    : (Ordered ? OMP_ord_static : OMP_sch_static);
3270   case OMPC_SCHEDULE_dynamic:
3271     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
3272   case OMPC_SCHEDULE_guided:
3273     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
3274   case OMPC_SCHEDULE_runtime:
3275     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
3276   case OMPC_SCHEDULE_auto:
3277     return Ordered ? OMP_ord_auto : OMP_sch_auto;
3278   case OMPC_SCHEDULE_unknown:
3279     assert(!Chunked && "chunk was specified but schedule kind not known");
3280     return Ordered ? OMP_ord_static : OMP_sch_static;
3281   }
3282   llvm_unreachable("Unexpected runtime schedule");
3283 }
3284 
3285 /// Map the OpenMP distribute schedule to the runtime enumeration.
3286 static OpenMPSchedType
3287 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
3288   // only static is allowed for dist_schedule
3289   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
3290 }
3291 
3292 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
3293                                          bool Chunked) const {
3294   OpenMPSchedType Schedule =
3295       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3296   return Schedule == OMP_sch_static;
3297 }
3298 
3299 bool CGOpenMPRuntime::isStaticNonchunked(
3300     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3301   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3302   return Schedule == OMP_dist_sch_static;
3303 }
3304 
3305 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
3306                                       bool Chunked) const {
3307   OpenMPSchedType Schedule =
3308       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3309   return Schedule == OMP_sch_static_chunked;
3310 }
3311 
3312 bool CGOpenMPRuntime::isStaticChunked(
3313     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3314   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3315   return Schedule == OMP_dist_sch_static_chunked;
3316 }
3317 
3318 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
3319   OpenMPSchedType Schedule =
3320       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3321   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3322   return Schedule != OMP_sch_static;
3323 }
3324 
3325 static int addMonoNonMonoModifier(OpenMPSchedType Schedule,
3326                                   OpenMPScheduleClauseModifier M1,
3327                                   OpenMPScheduleClauseModifier M2) {
3328   int Modifier = 0;
3329   switch (M1) {
3330   case OMPC_SCHEDULE_MODIFIER_monotonic:
3331     Modifier = OMP_sch_modifier_monotonic;
3332     break;
3333   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3334     Modifier = OMP_sch_modifier_nonmonotonic;
3335     break;
3336   case OMPC_SCHEDULE_MODIFIER_simd:
3337     if (Schedule == OMP_sch_static_chunked)
3338       Schedule = OMP_sch_static_balanced_chunked;
3339     break;
3340   case OMPC_SCHEDULE_MODIFIER_last:
3341   case OMPC_SCHEDULE_MODIFIER_unknown:
3342     break;
3343   }
3344   switch (M2) {
3345   case OMPC_SCHEDULE_MODIFIER_monotonic:
3346     Modifier = OMP_sch_modifier_monotonic;
3347     break;
3348   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3349     Modifier = OMP_sch_modifier_nonmonotonic;
3350     break;
3351   case OMPC_SCHEDULE_MODIFIER_simd:
3352     if (Schedule == OMP_sch_static_chunked)
3353       Schedule = OMP_sch_static_balanced_chunked;
3354     break;
3355   case OMPC_SCHEDULE_MODIFIER_last:
3356   case OMPC_SCHEDULE_MODIFIER_unknown:
3357     break;
3358   }
3359   return Schedule | Modifier;
3360 }
3361 
3362 void CGOpenMPRuntime::emitForDispatchInit(
3363     CodeGenFunction &CGF, SourceLocation Loc,
3364     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3365     bool Ordered, const DispatchRTInput &DispatchValues) {
3366   if (!CGF.HaveInsertPoint())
3367     return;
3368   OpenMPSchedType Schedule = getRuntimeSchedule(
3369       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3370   assert(Ordered ||
3371          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3372           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3373           Schedule != OMP_sch_static_balanced_chunked));
3374   // Call __kmpc_dispatch_init(
3375   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3376   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
3377   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
3378 
3379   // If the Chunk was not specified in the clause - use default value 1.
3380   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3381                                             : CGF.Builder.getIntN(IVSize, 1);
3382   llvm::Value *Args[] = {
3383       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3384       CGF.Builder.getInt32(addMonoNonMonoModifier(
3385           Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3386       DispatchValues.LB,                                // Lower
3387       DispatchValues.UB,                                // Upper
3388       CGF.Builder.getIntN(IVSize, 1),                   // Stride
3389       Chunk                                             // Chunk
3390   };
3391   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3392 }
3393 
3394 static void emitForStaticInitCall(
3395     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3396     llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule,
3397     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
3398     const CGOpenMPRuntime::StaticRTInput &Values) {
3399   if (!CGF.HaveInsertPoint())
3400     return;
3401 
3402   assert(!Values.Ordered);
3403   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3404          Schedule == OMP_sch_static_balanced_chunked ||
3405          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3406          Schedule == OMP_dist_sch_static ||
3407          Schedule == OMP_dist_sch_static_chunked);
3408 
3409   // Call __kmpc_for_static_init(
3410   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3411   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3412   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3413   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
3414   llvm::Value *Chunk = Values.Chunk;
3415   if (Chunk == nullptr) {
3416     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3417             Schedule == OMP_dist_sch_static) &&
3418            "expected static non-chunked schedule");
3419     // If the Chunk was not specified in the clause - use default value 1.
3420     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3421   } else {
3422     assert((Schedule == OMP_sch_static_chunked ||
3423             Schedule == OMP_sch_static_balanced_chunked ||
3424             Schedule == OMP_ord_static_chunked ||
3425             Schedule == OMP_dist_sch_static_chunked) &&
3426            "expected static chunked schedule");
3427   }
3428   llvm::Value *Args[] = {
3429       UpdateLocation,
3430       ThreadId,
3431       CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1,
3432                                                   M2)), // Schedule type
3433       Values.IL.getPointer(),                           // &isLastIter
3434       Values.LB.getPointer(),                           // &LB
3435       Values.UB.getPointer(),                           // &UB
3436       Values.ST.getPointer(),                           // &Stride
3437       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
3438       Chunk                                             // Chunk
3439   };
3440   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3441 }
3442 
3443 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
3444                                         SourceLocation Loc,
3445                                         OpenMPDirectiveKind DKind,
3446                                         const OpenMPScheduleTy &ScheduleKind,
3447                                         const StaticRTInput &Values) {
3448   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3449       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3450   assert(isOpenMPWorksharingDirective(DKind) &&
3451          "Expected loop-based or sections-based directive.");
3452   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3453                                              isOpenMPLoopDirective(DKind)
3454                                                  ? OMP_IDENT_WORK_LOOP
3455                                                  : OMP_IDENT_WORK_SECTIONS);
3456   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3457   llvm::Constant *StaticInitFunction =
3458       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3459   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3460                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3461 }
3462 
3463 void CGOpenMPRuntime::emitDistributeStaticInit(
3464     CodeGenFunction &CGF, SourceLocation Loc,
3465     OpenMPDistScheduleClauseKind SchedKind,
3466     const CGOpenMPRuntime::StaticRTInput &Values) {
3467   OpenMPSchedType ScheduleNum =
3468       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3469   llvm::Value *UpdatedLocation =
3470       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3471   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3472   llvm::Constant *StaticInitFunction =
3473       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3474   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3475                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3476                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
3477 }
3478 
3479 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
3480                                           SourceLocation Loc,
3481                                           OpenMPDirectiveKind DKind) {
3482   if (!CGF.HaveInsertPoint())
3483     return;
3484   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3485   llvm::Value *Args[] = {
3486       emitUpdateLocation(CGF, Loc,
3487                          isOpenMPDistributeDirective(DKind)
3488                              ? OMP_IDENT_WORK_DISTRIBUTE
3489                              : isOpenMPLoopDirective(DKind)
3490                                    ? OMP_IDENT_WORK_LOOP
3491                                    : OMP_IDENT_WORK_SECTIONS),
3492       getThreadID(CGF, Loc)};
3493   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
3494                       Args);
3495 }
3496 
3497 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
3498                                                  SourceLocation Loc,
3499                                                  unsigned IVSize,
3500                                                  bool IVSigned) {
3501   if (!CGF.HaveInsertPoint())
3502     return;
3503   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3504   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3505   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3506 }
3507 
3508 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
3509                                           SourceLocation Loc, unsigned IVSize,
3510                                           bool IVSigned, Address IL,
3511                                           Address LB, Address UB,
3512                                           Address ST) {
3513   // Call __kmpc_dispatch_next(
3514   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3515   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3516   //          kmp_int[32|64] *p_stride);
3517   llvm::Value *Args[] = {
3518       emitUpdateLocation(CGF, Loc),
3519       getThreadID(CGF, Loc),
3520       IL.getPointer(), // &isLastIter
3521       LB.getPointer(), // &Lower
3522       UB.getPointer(), // &Upper
3523       ST.getPointer()  // &Stride
3524   };
3525   llvm::Value *Call =
3526       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3527   return CGF.EmitScalarConversion(
3528       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
3529       CGF.getContext().BoolTy, Loc);
3530 }
3531 
3532 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
3533                                            llvm::Value *NumThreads,
3534                                            SourceLocation Loc) {
3535   if (!CGF.HaveInsertPoint())
3536     return;
3537   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3538   llvm::Value *Args[] = {
3539       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3540       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3541   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
3542                       Args);
3543 }
3544 
3545 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
3546                                          OpenMPProcBindClauseKind ProcBind,
3547                                          SourceLocation Loc) {
3548   if (!CGF.HaveInsertPoint())
3549     return;
3550   // Constants for proc bind value accepted by the runtime.
3551   enum ProcBindTy {
3552     ProcBindFalse = 0,
3553     ProcBindTrue,
3554     ProcBindMaster,
3555     ProcBindClose,
3556     ProcBindSpread,
3557     ProcBindIntel,
3558     ProcBindDefault
3559   } RuntimeProcBind;
3560   switch (ProcBind) {
3561   case OMPC_PROC_BIND_master:
3562     RuntimeProcBind = ProcBindMaster;
3563     break;
3564   case OMPC_PROC_BIND_close:
3565     RuntimeProcBind = ProcBindClose;
3566     break;
3567   case OMPC_PROC_BIND_spread:
3568     RuntimeProcBind = ProcBindSpread;
3569     break;
3570   case OMPC_PROC_BIND_unknown:
3571     llvm_unreachable("Unsupported proc_bind value.");
3572   }
3573   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3574   llvm::Value *Args[] = {
3575       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3576       llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
3577   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
3578 }
3579 
3580 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3581                                 SourceLocation Loc) {
3582   if (!CGF.HaveInsertPoint())
3583     return;
3584   // Build call void __kmpc_flush(ident_t *loc)
3585   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
3586                       emitUpdateLocation(CGF, Loc));
3587 }
3588 
3589 namespace {
3590 /// Indexes of fields for type kmp_task_t.
3591 enum KmpTaskTFields {
3592   /// List of shared variables.
3593   KmpTaskTShareds,
3594   /// Task routine.
3595   KmpTaskTRoutine,
3596   /// Partition id for the untied tasks.
3597   KmpTaskTPartId,
3598   /// Function with call of destructors for private variables.
3599   Data1,
3600   /// Task priority.
3601   Data2,
3602   /// (Taskloops only) Lower bound.
3603   KmpTaskTLowerBound,
3604   /// (Taskloops only) Upper bound.
3605   KmpTaskTUpperBound,
3606   /// (Taskloops only) Stride.
3607   KmpTaskTStride,
3608   /// (Taskloops only) Is last iteration flag.
3609   KmpTaskTLastIter,
3610   /// (Taskloops only) Reduction data.
3611   KmpTaskTReductions,
3612 };
3613 } // anonymous namespace
3614 
3615 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3616   return OffloadEntriesTargetRegion.empty() &&
3617          OffloadEntriesDeviceGlobalVar.empty();
3618 }
3619 
3620 /// Initialize target region entry.
3621 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3622     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3623                                     StringRef ParentName, unsigned LineNum,
3624                                     unsigned Order) {
3625   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3626                                              "only required for the device "
3627                                              "code generation.");
3628   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3629       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3630                                    OMPTargetRegionEntryTargetRegion);
3631   ++OffloadingEntriesNum;
3632 }
3633 
3634 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3635     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3636                                   StringRef ParentName, unsigned LineNum,
3637                                   llvm::Constant *Addr, llvm::Constant *ID,
3638                                   OMPTargetRegionEntryKind Flags) {
3639   // If we are emitting code for a target, the entry is already initialized,
3640   // only has to be registered.
3641   if (CGM.getLangOpts().OpenMPIsDevice) {
3642     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3643       unsigned DiagID = CGM.getDiags().getCustomDiagID(
3644           DiagnosticsEngine::Error,
3645           "Unable to find target region on line '%0' in the device code.");
3646       CGM.getDiags().Report(DiagID) << LineNum;
3647       return;
3648     }
3649     auto &Entry =
3650         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3651     assert(Entry.isValid() && "Entry not initialized!");
3652     Entry.setAddress(Addr);
3653     Entry.setID(ID);
3654     Entry.setFlags(Flags);
3655   } else {
3656     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3657     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3658     ++OffloadingEntriesNum;
3659   }
3660 }
3661 
3662 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3663     unsigned DeviceID, unsigned FileID, StringRef ParentName,
3664     unsigned LineNum) const {
3665   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3666   if (PerDevice == OffloadEntriesTargetRegion.end())
3667     return false;
3668   auto PerFile = PerDevice->second.find(FileID);
3669   if (PerFile == PerDevice->second.end())
3670     return false;
3671   auto PerParentName = PerFile->second.find(ParentName);
3672   if (PerParentName == PerFile->second.end())
3673     return false;
3674   auto PerLine = PerParentName->second.find(LineNum);
3675   if (PerLine == PerParentName->second.end())
3676     return false;
3677   // Fail if this entry is already registered.
3678   if (PerLine->second.getAddress() || PerLine->second.getID())
3679     return false;
3680   return true;
3681 }
3682 
3683 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3684     const OffloadTargetRegionEntryInfoActTy &Action) {
3685   // Scan all target region entries and perform the provided action.
3686   for (const auto &D : OffloadEntriesTargetRegion)
3687     for (const auto &F : D.second)
3688       for (const auto &P : F.second)
3689         for (const auto &L : P.second)
3690           Action(D.first, F.first, P.first(), L.first, L.second);
3691 }
3692 
3693 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3694     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3695                                        OMPTargetGlobalVarEntryKind Flags,
3696                                        unsigned Order) {
3697   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3698                                              "only required for the device "
3699                                              "code generation.");
3700   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3701   ++OffloadingEntriesNum;
3702 }
3703 
3704 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3705     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3706                                      CharUnits VarSize,
3707                                      OMPTargetGlobalVarEntryKind Flags,
3708                                      llvm::GlobalValue::LinkageTypes Linkage) {
3709   if (CGM.getLangOpts().OpenMPIsDevice) {
3710     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3711     assert(Entry.isValid() && Entry.getFlags() == Flags &&
3712            "Entry not initialized!");
3713     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3714            "Resetting with the new address.");
3715     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName))
3716       return;
3717     Entry.setAddress(Addr);
3718     Entry.setVarSize(VarSize);
3719     Entry.setLinkage(Linkage);
3720   } else {
3721     if (hasDeviceGlobalVarEntryInfo(VarName))
3722       return;
3723     OffloadEntriesDeviceGlobalVar.try_emplace(
3724         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3725     ++OffloadingEntriesNum;
3726   }
3727 }
3728 
3729 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3730     actOnDeviceGlobalVarEntriesInfo(
3731         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3732   // Scan all target region entries and perform the provided action.
3733   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3734     Action(E.getKey(), E.getValue());
3735 }
3736 
3737 llvm::Function *
3738 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
3739   // If we don't have entries or if we are emitting code for the device, we
3740   // don't need to do anything.
3741   if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
3742     return nullptr;
3743 
3744   llvm::Module &M = CGM.getModule();
3745   ASTContext &C = CGM.getContext();
3746 
3747   // Get list of devices we care about
3748   const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples;
3749 
3750   // We should be creating an offloading descriptor only if there are devices
3751   // specified.
3752   assert(!Devices.empty() && "No OpenMP offloading devices??");
3753 
3754   // Create the external variables that will point to the begin and end of the
3755   // host entries section. These will be defined by the linker.
3756   llvm::Type *OffloadEntryTy =
3757       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
3758   std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"});
3759   auto *HostEntriesBegin = new llvm::GlobalVariable(
3760       M, OffloadEntryTy, /*isConstant=*/true,
3761       llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
3762       EntriesBeginName);
3763   std::string EntriesEndName = getName({"omp_offloading", "entries_end"});
3764   auto *HostEntriesEnd =
3765       new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true,
3766                                llvm::GlobalValue::ExternalLinkage,
3767                                /*Initializer=*/nullptr, EntriesEndName);
3768 
3769   // Create all device images
3770   auto *DeviceImageTy = cast<llvm::StructType>(
3771       CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy()));
3772   ConstantInitBuilder DeviceImagesBuilder(CGM);
3773   ConstantArrayBuilder DeviceImagesEntries =
3774       DeviceImagesBuilder.beginArray(DeviceImageTy);
3775 
3776   for (const llvm::Triple &Device : Devices) {
3777     StringRef T = Device.getTriple();
3778     std::string BeginName = getName({"omp_offloading", "img_start", ""});
3779     auto *ImgBegin = new llvm::GlobalVariable(
3780         M, CGM.Int8Ty, /*isConstant=*/true,
3781         llvm::GlobalValue::ExternalWeakLinkage,
3782         /*Initializer=*/nullptr, Twine(BeginName).concat(T));
3783     std::string EndName = getName({"omp_offloading", "img_end", ""});
3784     auto *ImgEnd = new llvm::GlobalVariable(
3785         M, CGM.Int8Ty, /*isConstant=*/true,
3786         llvm::GlobalValue::ExternalWeakLinkage,
3787         /*Initializer=*/nullptr, Twine(EndName).concat(T));
3788 
3789     llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin,
3790                               HostEntriesEnd};
3791     createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data,
3792                                              DeviceImagesEntries);
3793   }
3794 
3795   // Create device images global array.
3796   std::string ImagesName = getName({"omp_offloading", "device_images"});
3797   llvm::GlobalVariable *DeviceImages =
3798       DeviceImagesEntries.finishAndCreateGlobal(ImagesName,
3799                                                 CGM.getPointerAlign(),
3800                                                 /*isConstant=*/true);
3801   DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3802 
3803   // This is a Zero array to be used in the creation of the constant expressions
3804   llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
3805                              llvm::Constant::getNullValue(CGM.Int32Ty)};
3806 
3807   // Create the target region descriptor.
3808   llvm::Constant *Data[] = {
3809       llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()),
3810       llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(),
3811                                            DeviceImages, Index),
3812       HostEntriesBegin, HostEntriesEnd};
3813   std::string Descriptor = getName({"omp_offloading", "descriptor"});
3814   llvm::GlobalVariable *Desc = createGlobalStruct(
3815       CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor);
3816 
3817   // Emit code to register or unregister the descriptor at execution
3818   // startup or closing, respectively.
3819 
3820   llvm::Function *UnRegFn;
3821   {
3822     FunctionArgList Args;
3823     ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other);
3824     Args.push_back(&DummyPtr);
3825 
3826     CodeGenFunction CGF(CGM);
3827     // Disable debug info for global (de-)initializer because they are not part
3828     // of some particular construct.
3829     CGF.disableDebugInfo();
3830     const auto &FI =
3831         CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3832     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
3833     std::string UnregName = getName({"omp_offloading", "descriptor_unreg"});
3834     UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI);
3835     CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args);
3836     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
3837                         Desc);
3838     CGF.FinishFunction();
3839   }
3840   llvm::Function *RegFn;
3841   {
3842     CodeGenFunction CGF(CGM);
3843     // Disable debug info for global (de-)initializer because they are not part
3844     // of some particular construct.
3845     CGF.disableDebugInfo();
3846     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
3847     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
3848 
3849     // Encode offload target triples into the registration function name. It
3850     // will serve as a comdat key for the registration/unregistration code for
3851     // this particular combination of offloading targets.
3852     SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U);
3853     RegFnNameParts[0] = "omp_offloading";
3854     RegFnNameParts[1] = "descriptor_reg";
3855     llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2),
3856                     [](const llvm::Triple &T) -> const std::string& {
3857                       return T.getTriple();
3858                     });
3859     llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end());
3860     std::string Descriptor = getName(RegFnNameParts);
3861     RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI);
3862     CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList());
3863     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc);
3864     // Create a variable to drive the registration and unregistration of the
3865     // descriptor, so we can reuse the logic that emits Ctors and Dtors.
3866     ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(),
3867                                   SourceLocation(), nullptr, C.CharTy,
3868                                   ImplicitParamDecl::Other);
3869     CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
3870     CGF.FinishFunction();
3871   }
3872   if (CGM.supportsCOMDAT()) {
3873     // It is sufficient to call registration function only once, so create a
3874     // COMDAT group for registration/unregistration functions and associated
3875     // data. That would reduce startup time and code size. Registration
3876     // function serves as a COMDAT group key.
3877     llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName());
3878     RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
3879     RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility);
3880     RegFn->setComdat(ComdatKey);
3881     UnRegFn->setComdat(ComdatKey);
3882     DeviceImages->setComdat(ComdatKey);
3883     Desc->setComdat(ComdatKey);
3884   }
3885   return RegFn;
3886 }
3887 
3888 void CGOpenMPRuntime::createOffloadEntry(
3889     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3890     llvm::GlobalValue::LinkageTypes Linkage) {
3891   StringRef Name = Addr->getName();
3892   llvm::Module &M = CGM.getModule();
3893   llvm::LLVMContext &C = M.getContext();
3894 
3895   // Create constant string with the name.
3896   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3897 
3898   std::string StringName = getName({"omp_offloading", "entry_name"});
3899   auto *Str = new llvm::GlobalVariable(
3900       M, StrPtrInit->getType(), /*isConstant=*/true,
3901       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3902   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3903 
3904   llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
3905                             llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
3906                             llvm::ConstantInt::get(CGM.SizeTy, Size),
3907                             llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3908                             llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3909   std::string EntryName = getName({"omp_offloading", "entry", ""});
3910   llvm::GlobalVariable *Entry = createGlobalStruct(
3911       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3912       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3913 
3914   // The entry has to be created in the section the linker expects it to be.
3915   std::string Section = getName({"omp_offloading", "entries"});
3916   Entry->setSection(Section);
3917 }
3918 
3919 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3920   // Emit the offloading entries and metadata so that the device codegen side
3921   // can easily figure out what to emit. The produced metadata looks like
3922   // this:
3923   //
3924   // !omp_offload.info = !{!1, ...}
3925   //
3926   // Right now we only generate metadata for function that contain target
3927   // regions.
3928 
3929   // If we do not have entries, we don't need to do anything.
3930   if (OffloadEntriesInfoManager.empty())
3931     return;
3932 
3933   llvm::Module &M = CGM.getModule();
3934   llvm::LLVMContext &C = M.getContext();
3935   SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
3936       OrderedEntries(OffloadEntriesInfoManager.size());
3937   llvm::SmallVector<StringRef, 16> ParentFunctions(
3938       OffloadEntriesInfoManager.size());
3939 
3940   // Auxiliary methods to create metadata values and strings.
3941   auto &&GetMDInt = [this](unsigned V) {
3942     return llvm::ConstantAsMetadata::get(
3943         llvm::ConstantInt::get(CGM.Int32Ty, V));
3944   };
3945 
3946   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3947 
3948   // Create the offloading info metadata node.
3949   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3950 
3951   // Create function that emits metadata for each target region entry;
3952   auto &&TargetRegionMetadataEmitter =
3953       [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString](
3954           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3955           unsigned Line,
3956           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3957         // Generate metadata for target regions. Each entry of this metadata
3958         // contains:
3959         // - Entry 0 -> Kind of this type of metadata (0).
3960         // - Entry 1 -> Device ID of the file where the entry was identified.
3961         // - Entry 2 -> File ID of the file where the entry was identified.
3962         // - Entry 3 -> Mangled name of the function where the entry was
3963         // identified.
3964         // - Entry 4 -> Line in the file where the entry was identified.
3965         // - Entry 5 -> Order the entry was created.
3966         // The first element of the metadata node is the kind.
3967         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3968                                  GetMDInt(FileID),      GetMDString(ParentName),
3969                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3970 
3971         // Save this entry in the right position of the ordered entries array.
3972         OrderedEntries[E.getOrder()] = &E;
3973         ParentFunctions[E.getOrder()] = ParentName;
3974 
3975         // Add metadata to the named metadata node.
3976         MD->addOperand(llvm::MDNode::get(C, Ops));
3977       };
3978 
3979   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3980       TargetRegionMetadataEmitter);
3981 
3982   // Create function that emits metadata for each device global variable entry;
3983   auto &&DeviceGlobalVarMetadataEmitter =
3984       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3985        MD](StringRef MangledName,
3986            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3987                &E) {
3988         // Generate metadata for global variables. Each entry of this metadata
3989         // contains:
3990         // - Entry 0 -> Kind of this type of metadata (1).
3991         // - Entry 1 -> Mangled name of the variable.
3992         // - Entry 2 -> Declare target kind.
3993         // - Entry 3 -> Order the entry was created.
3994         // The first element of the metadata node is the kind.
3995         llvm::Metadata *Ops[] = {
3996             GetMDInt(E.getKind()), GetMDString(MangledName),
3997             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3998 
3999         // Save this entry in the right position of the ordered entries array.
4000         OrderedEntries[E.getOrder()] = &E;
4001 
4002         // Add metadata to the named metadata node.
4003         MD->addOperand(llvm::MDNode::get(C, Ops));
4004       };
4005 
4006   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
4007       DeviceGlobalVarMetadataEmitter);
4008 
4009   for (const auto *E : OrderedEntries) {
4010     assert(E && "All ordered entries must exist!");
4011     if (const auto *CE =
4012             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
4013                 E)) {
4014       if (!CE->getID() || !CE->getAddress()) {
4015         // Do not blame the entry if the parent funtion is not emitted.
4016         StringRef FnName = ParentFunctions[CE->getOrder()];
4017         if (!CGM.GetGlobalValue(FnName))
4018           continue;
4019         unsigned DiagID = CGM.getDiags().getCustomDiagID(
4020             DiagnosticsEngine::Error,
4021             "Offloading entry for target region is incorrect: either the "
4022             "address or the ID is invalid.");
4023         CGM.getDiags().Report(DiagID);
4024         continue;
4025       }
4026       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
4027                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
4028     } else if (const auto *CE =
4029                    dyn_cast<OffloadEntriesInfoManagerTy::
4030                                 OffloadEntryInfoDeviceGlobalVar>(E)) {
4031       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
4032           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4033               CE->getFlags());
4034       switch (Flags) {
4035       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
4036         if (!CE->getAddress()) {
4037           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4038               DiagnosticsEngine::Error,
4039               "Offloading entry for declare target variable is incorrect: the "
4040               "address is invalid.");
4041           CGM.getDiags().Report(DiagID);
4042           continue;
4043         }
4044         // The vaiable has no definition - no need to add the entry.
4045         if (CE->getVarSize().isZero())
4046           continue;
4047         break;
4048       }
4049       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
4050         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
4051                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
4052                "Declaret target link address is set.");
4053         if (CGM.getLangOpts().OpenMPIsDevice)
4054           continue;
4055         if (!CE->getAddress()) {
4056           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4057               DiagnosticsEngine::Error,
4058               "Offloading entry for declare target variable is incorrect: the "
4059               "address is invalid.");
4060           CGM.getDiags().Report(DiagID);
4061           continue;
4062         }
4063         break;
4064       }
4065       createOffloadEntry(CE->getAddress(), CE->getAddress(),
4066                          CE->getVarSize().getQuantity(), Flags,
4067                          CE->getLinkage());
4068     } else {
4069       llvm_unreachable("Unsupported entry kind.");
4070     }
4071   }
4072 }
4073 
4074 /// Loads all the offload entries information from the host IR
4075 /// metadata.
4076 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
4077   // If we are in target mode, load the metadata from the host IR. This code has
4078   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
4079 
4080   if (!CGM.getLangOpts().OpenMPIsDevice)
4081     return;
4082 
4083   if (CGM.getLangOpts().OMPHostIRFile.empty())
4084     return;
4085 
4086   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
4087   if (auto EC = Buf.getError()) {
4088     CGM.getDiags().Report(diag::err_cannot_open_file)
4089         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4090     return;
4091   }
4092 
4093   llvm::LLVMContext C;
4094   auto ME = expectedToErrorOrAndEmitErrors(
4095       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
4096 
4097   if (auto EC = ME.getError()) {
4098     unsigned DiagID = CGM.getDiags().getCustomDiagID(
4099         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
4100     CGM.getDiags().Report(DiagID)
4101         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4102     return;
4103   }
4104 
4105   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
4106   if (!MD)
4107     return;
4108 
4109   for (llvm::MDNode *MN : MD->operands()) {
4110     auto &&GetMDInt = [MN](unsigned Idx) {
4111       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
4112       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
4113     };
4114 
4115     auto &&GetMDString = [MN](unsigned Idx) {
4116       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
4117       return V->getString();
4118     };
4119 
4120     switch (GetMDInt(0)) {
4121     default:
4122       llvm_unreachable("Unexpected metadata!");
4123       break;
4124     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4125         OffloadingEntryInfoTargetRegion:
4126       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
4127           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
4128           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
4129           /*Order=*/GetMDInt(5));
4130       break;
4131     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4132         OffloadingEntryInfoDeviceGlobalVar:
4133       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
4134           /*MangledName=*/GetMDString(1),
4135           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4136               /*Flags=*/GetMDInt(2)),
4137           /*Order=*/GetMDInt(3));
4138       break;
4139     }
4140   }
4141 }
4142 
4143 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
4144   if (!KmpRoutineEntryPtrTy) {
4145     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
4146     ASTContext &C = CGM.getContext();
4147     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
4148     FunctionProtoType::ExtProtoInfo EPI;
4149     KmpRoutineEntryPtrQTy = C.getPointerType(
4150         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
4151     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
4152   }
4153 }
4154 
4155 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
4156   // Make sure the type of the entry is already created. This is the type we
4157   // have to create:
4158   // struct __tgt_offload_entry{
4159   //   void      *addr;       // Pointer to the offload entry info.
4160   //                          // (function or global)
4161   //   char      *name;       // Name of the function or global.
4162   //   size_t     size;       // Size of the entry info (0 if it a function).
4163   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
4164   //   int32_t    reserved;   // Reserved, to use by the runtime library.
4165   // };
4166   if (TgtOffloadEntryQTy.isNull()) {
4167     ASTContext &C = CGM.getContext();
4168     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
4169     RD->startDefinition();
4170     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4171     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
4172     addFieldToRecordDecl(C, RD, C.getSizeType());
4173     addFieldToRecordDecl(
4174         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4175     addFieldToRecordDecl(
4176         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4177     RD->completeDefinition();
4178     RD->addAttr(PackedAttr::CreateImplicit(C));
4179     TgtOffloadEntryQTy = C.getRecordType(RD);
4180   }
4181   return TgtOffloadEntryQTy;
4182 }
4183 
4184 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() {
4185   // These are the types we need to build:
4186   // struct __tgt_device_image{
4187   // void   *ImageStart;       // Pointer to the target code start.
4188   // void   *ImageEnd;         // Pointer to the target code end.
4189   // // We also add the host entries to the device image, as it may be useful
4190   // // for the target runtime to have access to that information.
4191   // __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all
4192   //                                       // the entries.
4193   // __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
4194   //                                       // entries (non inclusive).
4195   // };
4196   if (TgtDeviceImageQTy.isNull()) {
4197     ASTContext &C = CGM.getContext();
4198     RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image");
4199     RD->startDefinition();
4200     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4201     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4202     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4203     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4204     RD->completeDefinition();
4205     TgtDeviceImageQTy = C.getRecordType(RD);
4206   }
4207   return TgtDeviceImageQTy;
4208 }
4209 
4210 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() {
4211   // struct __tgt_bin_desc{
4212   //   int32_t              NumDevices;      // Number of devices supported.
4213   //   __tgt_device_image   *DeviceImages;   // Arrays of device images
4214   //                                         // (one per device).
4215   //   __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all the
4216   //                                         // entries.
4217   //   __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
4218   //                                         // entries (non inclusive).
4219   // };
4220   if (TgtBinaryDescriptorQTy.isNull()) {
4221     ASTContext &C = CGM.getContext();
4222     RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc");
4223     RD->startDefinition();
4224     addFieldToRecordDecl(
4225         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4226     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy()));
4227     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4228     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4229     RD->completeDefinition();
4230     TgtBinaryDescriptorQTy = C.getRecordType(RD);
4231   }
4232   return TgtBinaryDescriptorQTy;
4233 }
4234 
4235 namespace {
4236 struct PrivateHelpersTy {
4237   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
4238                    const VarDecl *PrivateElemInit)
4239       : Original(Original), PrivateCopy(PrivateCopy),
4240         PrivateElemInit(PrivateElemInit) {}
4241   const VarDecl *Original;
4242   const VarDecl *PrivateCopy;
4243   const VarDecl *PrivateElemInit;
4244 };
4245 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4246 } // anonymous namespace
4247 
4248 static RecordDecl *
4249 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
4250   if (!Privates.empty()) {
4251     ASTContext &C = CGM.getContext();
4252     // Build struct .kmp_privates_t. {
4253     //         /*  private vars  */
4254     //       };
4255     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
4256     RD->startDefinition();
4257     for (const auto &Pair : Privates) {
4258       const VarDecl *VD = Pair.second.Original;
4259       QualType Type = VD->getType().getNonReferenceType();
4260       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
4261       if (VD->hasAttrs()) {
4262         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4263              E(VD->getAttrs().end());
4264              I != E; ++I)
4265           FD->addAttr(*I);
4266       }
4267     }
4268     RD->completeDefinition();
4269     return RD;
4270   }
4271   return nullptr;
4272 }
4273 
4274 static RecordDecl *
4275 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
4276                          QualType KmpInt32Ty,
4277                          QualType KmpRoutineEntryPointerQTy) {
4278   ASTContext &C = CGM.getContext();
4279   // Build struct kmp_task_t {
4280   //         void *              shareds;
4281   //         kmp_routine_entry_t routine;
4282   //         kmp_int32           part_id;
4283   //         kmp_cmplrdata_t data1;
4284   //         kmp_cmplrdata_t data2;
4285   // For taskloops additional fields:
4286   //         kmp_uint64          lb;
4287   //         kmp_uint64          ub;
4288   //         kmp_int64           st;
4289   //         kmp_int32           liter;
4290   //         void *              reductions;
4291   //       };
4292   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4293   UD->startDefinition();
4294   addFieldToRecordDecl(C, UD, KmpInt32Ty);
4295   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4296   UD->completeDefinition();
4297   QualType KmpCmplrdataTy = C.getRecordType(UD);
4298   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
4299   RD->startDefinition();
4300   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4301   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4302   addFieldToRecordDecl(C, RD, KmpInt32Ty);
4303   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4304   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4305   if (isOpenMPTaskLoopDirective(Kind)) {
4306     QualType KmpUInt64Ty =
4307         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4308     QualType KmpInt64Ty =
4309         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4310     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4311     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4312     addFieldToRecordDecl(C, RD, KmpInt64Ty);
4313     addFieldToRecordDecl(C, RD, KmpInt32Ty);
4314     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4315   }
4316   RD->completeDefinition();
4317   return RD;
4318 }
4319 
4320 static RecordDecl *
4321 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
4322                                      ArrayRef<PrivateDataTy> Privates) {
4323   ASTContext &C = CGM.getContext();
4324   // Build struct kmp_task_t_with_privates {
4325   //         kmp_task_t task_data;
4326   //         .kmp_privates_t. privates;
4327   //       };
4328   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4329   RD->startDefinition();
4330   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4331   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
4332     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4333   RD->completeDefinition();
4334   return RD;
4335 }
4336 
4337 /// Emit a proxy function which accepts kmp_task_t as the second
4338 /// argument.
4339 /// \code
4340 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4341 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4342 ///   For taskloops:
4343 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4344 ///   tt->reductions, tt->shareds);
4345 ///   return 0;
4346 /// }
4347 /// \endcode
4348 static llvm::Value *
4349 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
4350                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4351                       QualType KmpTaskTWithPrivatesPtrQTy,
4352                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4353                       QualType SharedsPtrTy, llvm::Value *TaskFunction,
4354                       llvm::Value *TaskPrivatesMap) {
4355   ASTContext &C = CGM.getContext();
4356   FunctionArgList Args;
4357   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4358                             ImplicitParamDecl::Other);
4359   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4360                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4361                                 ImplicitParamDecl::Other);
4362   Args.push_back(&GtidArg);
4363   Args.push_back(&TaskTypeArg);
4364   const auto &TaskEntryFnInfo =
4365       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4366   llvm::FunctionType *TaskEntryTy =
4367       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4368   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
4369   auto *TaskEntry = llvm::Function::Create(
4370       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4371   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4372   TaskEntry->setDoesNotRecurse();
4373   CodeGenFunction CGF(CGM);
4374   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4375                     Loc, Loc);
4376 
4377   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4378   // tt,
4379   // For taskloops:
4380   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4381   // tt->task_data.shareds);
4382   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
4383       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4384   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4385       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4386       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4387   const auto *KmpTaskTWithPrivatesQTyRD =
4388       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4389   LValue Base =
4390       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4391   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4392   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4393   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4394   llvm::Value *PartidParam = PartIdLVal.getPointer();
4395 
4396   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4397   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4398   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4399       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4400       CGF.ConvertTypeForMem(SharedsPtrTy));
4401 
4402   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4403   llvm::Value *PrivatesParam;
4404   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4405     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4406     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4407         PrivatesLVal.getPointer(), CGF.VoidPtrTy);
4408   } else {
4409     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4410   }
4411 
4412   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4413                                TaskPrivatesMap,
4414                                CGF.Builder
4415                                    .CreatePointerBitCastOrAddrSpaceCast(
4416                                        TDBase.getAddress(), CGF.VoidPtrTy)
4417                                    .getPointer()};
4418   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4419                                           std::end(CommonArgs));
4420   if (isOpenMPTaskLoopDirective(Kind)) {
4421     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4422     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4423     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4424     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4425     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4426     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4427     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4428     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
4429     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4430     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4431     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4432     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4433     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4434     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
4435     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4436     CallArgs.push_back(LBParam);
4437     CallArgs.push_back(UBParam);
4438     CallArgs.push_back(StParam);
4439     CallArgs.push_back(LIParam);
4440     CallArgs.push_back(RParam);
4441   }
4442   CallArgs.push_back(SharedsParam);
4443 
4444   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4445                                                   CallArgs);
4446   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4447                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4448   CGF.FinishFunction();
4449   return TaskEntry;
4450 }
4451 
4452 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
4453                                             SourceLocation Loc,
4454                                             QualType KmpInt32Ty,
4455                                             QualType KmpTaskTWithPrivatesPtrQTy,
4456                                             QualType KmpTaskTWithPrivatesQTy) {
4457   ASTContext &C = CGM.getContext();
4458   FunctionArgList Args;
4459   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4460                             ImplicitParamDecl::Other);
4461   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4462                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4463                                 ImplicitParamDecl::Other);
4464   Args.push_back(&GtidArg);
4465   Args.push_back(&TaskTypeArg);
4466   const auto &DestructorFnInfo =
4467       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4468   llvm::FunctionType *DestructorFnTy =
4469       CGM.getTypes().GetFunctionType(DestructorFnInfo);
4470   std::string Name =
4471       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
4472   auto *DestructorFn =
4473       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
4474                              Name, &CGM.getModule());
4475   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4476                                     DestructorFnInfo);
4477   DestructorFn->setDoesNotRecurse();
4478   CodeGenFunction CGF(CGM);
4479   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4480                     Args, Loc, Loc);
4481 
4482   LValue Base = CGF.EmitLoadOfPointerLValue(
4483       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4484       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4485   const auto *KmpTaskTWithPrivatesQTyRD =
4486       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4487   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4488   Base = CGF.EmitLValueForField(Base, *FI);
4489   for (const auto *Field :
4490        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4491     if (QualType::DestructionKind DtorKind =
4492             Field->getType().isDestructedType()) {
4493       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
4494       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
4495     }
4496   }
4497   CGF.FinishFunction();
4498   return DestructorFn;
4499 }
4500 
4501 /// Emit a privates mapping function for correct handling of private and
4502 /// firstprivate variables.
4503 /// \code
4504 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4505 /// **noalias priv1,...,  <tyn> **noalias privn) {
4506 ///   *priv1 = &.privates.priv1;
4507 ///   ...;
4508 ///   *privn = &.privates.privn;
4509 /// }
4510 /// \endcode
4511 static llvm::Value *
4512 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
4513                                ArrayRef<const Expr *> PrivateVars,
4514                                ArrayRef<const Expr *> FirstprivateVars,
4515                                ArrayRef<const Expr *> LastprivateVars,
4516                                QualType PrivatesQTy,
4517                                ArrayRef<PrivateDataTy> Privates) {
4518   ASTContext &C = CGM.getContext();
4519   FunctionArgList Args;
4520   ImplicitParamDecl TaskPrivatesArg(
4521       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4522       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4523       ImplicitParamDecl::Other);
4524   Args.push_back(&TaskPrivatesArg);
4525   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4526   unsigned Counter = 1;
4527   for (const Expr *E : PrivateVars) {
4528     Args.push_back(ImplicitParamDecl::Create(
4529         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4530         C.getPointerType(C.getPointerType(E->getType()))
4531             .withConst()
4532             .withRestrict(),
4533         ImplicitParamDecl::Other));
4534     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4535     PrivateVarsPos[VD] = Counter;
4536     ++Counter;
4537   }
4538   for (const Expr *E : FirstprivateVars) {
4539     Args.push_back(ImplicitParamDecl::Create(
4540         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4541         C.getPointerType(C.getPointerType(E->getType()))
4542             .withConst()
4543             .withRestrict(),
4544         ImplicitParamDecl::Other));
4545     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4546     PrivateVarsPos[VD] = Counter;
4547     ++Counter;
4548   }
4549   for (const Expr *E : LastprivateVars) {
4550     Args.push_back(ImplicitParamDecl::Create(
4551         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4552         C.getPointerType(C.getPointerType(E->getType()))
4553             .withConst()
4554             .withRestrict(),
4555         ImplicitParamDecl::Other));
4556     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4557     PrivateVarsPos[VD] = Counter;
4558     ++Counter;
4559   }
4560   const auto &TaskPrivatesMapFnInfo =
4561       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4562   llvm::FunctionType *TaskPrivatesMapTy =
4563       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4564   std::string Name =
4565       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
4566   auto *TaskPrivatesMap = llvm::Function::Create(
4567       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
4568       &CGM.getModule());
4569   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
4570                                     TaskPrivatesMapFnInfo);
4571   TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4572   TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4573   TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4574   CodeGenFunction CGF(CGM);
4575   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4576                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
4577 
4578   // *privi = &.privates.privi;
4579   LValue Base = CGF.EmitLoadOfPointerLValue(
4580       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4581       TaskPrivatesArg.getType()->castAs<PointerType>());
4582   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4583   Counter = 0;
4584   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
4585     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
4586     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4587     LValue RefLVal =
4588         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4589     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4590         RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
4591     CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
4592     ++Counter;
4593   }
4594   CGF.FinishFunction();
4595   return TaskPrivatesMap;
4596 }
4597 
4598 static bool stable_sort_comparator(const PrivateDataTy P1,
4599                                    const PrivateDataTy P2) {
4600   return P1.first > P2.first;
4601 }
4602 
4603 /// Emit initialization for private variables in task-based directives.
4604 static void emitPrivatesInit(CodeGenFunction &CGF,
4605                              const OMPExecutableDirective &D,
4606                              Address KmpTaskSharedsPtr, LValue TDBase,
4607                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4608                              QualType SharedsTy, QualType SharedsPtrTy,
4609                              const OMPTaskDataTy &Data,
4610                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4611   ASTContext &C = CGF.getContext();
4612   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4613   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4614   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
4615                                  ? OMPD_taskloop
4616                                  : OMPD_task;
4617   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
4618   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
4619   LValue SrcBase;
4620   bool IsTargetTask =
4621       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
4622       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
4623   // For target-based directives skip 3 firstprivate arrays BasePointersArray,
4624   // PointersArray and SizesArray. The original variables for these arrays are
4625   // not captured and we get their addresses explicitly.
4626   if ((!IsTargetTask && !Data.FirstprivateVars.empty()) ||
4627       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
4628     SrcBase = CGF.MakeAddrLValue(
4629         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4630             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4631         SharedsTy);
4632   }
4633   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4634   for (const PrivateDataTy &Pair : Privates) {
4635     const VarDecl *VD = Pair.second.PrivateCopy;
4636     const Expr *Init = VD->getAnyInitializer();
4637     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4638                              !CGF.isTrivialInitializer(Init)))) {
4639       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4640       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
4641         const VarDecl *OriginalVD = Pair.second.Original;
4642         // Check if the variable is the target-based BasePointersArray,
4643         // PointersArray or SizesArray.
4644         LValue SharedRefLValue;
4645         QualType Type = OriginalVD->getType();
4646         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
4647         if (IsTargetTask && !SharedField) {
4648           assert(isa<ImplicitParamDecl>(OriginalVD) &&
4649                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
4650                  cast<CapturedDecl>(OriginalVD->getDeclContext())
4651                          ->getNumParams() == 0 &&
4652                  isa<TranslationUnitDecl>(
4653                      cast<CapturedDecl>(OriginalVD->getDeclContext())
4654                          ->getDeclContext()) &&
4655                  "Expected artificial target data variable.");
4656           SharedRefLValue =
4657               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
4658         } else {
4659           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4660           SharedRefLValue = CGF.MakeAddrLValue(
4661               Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
4662               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
4663               SharedRefLValue.getTBAAInfo());
4664         }
4665         if (Type->isArrayType()) {
4666           // Initialize firstprivate array.
4667           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4668             // Perform simple memcpy.
4669             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
4670           } else {
4671             // Initialize firstprivate array using element-by-element
4672             // initialization.
4673             CGF.EmitOMPAggregateAssign(
4674                 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
4675                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4676                                                   Address SrcElement) {
4677                   // Clean up any temporaries needed by the initialization.
4678                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
4679                   InitScope.addPrivate(
4680                       Elem, [SrcElement]() -> Address { return SrcElement; });
4681                   (void)InitScope.Privatize();
4682                   // Emit initialization for single element.
4683                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
4684                       CGF, &CapturesInfo);
4685                   CGF.EmitAnyExprToMem(Init, DestElement,
4686                                        Init->getType().getQualifiers(),
4687                                        /*IsInitializer=*/false);
4688                 });
4689           }
4690         } else {
4691           CodeGenFunction::OMPPrivateScope InitScope(CGF);
4692           InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
4693             return SharedRefLValue.getAddress();
4694           });
4695           (void)InitScope.Privatize();
4696           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4697           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4698                              /*capturedByInit=*/false);
4699         }
4700       } else {
4701         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4702       }
4703     }
4704     ++FI;
4705   }
4706 }
4707 
4708 /// Check if duplication function is required for taskloops.
4709 static bool checkInitIsRequired(CodeGenFunction &CGF,
4710                                 ArrayRef<PrivateDataTy> Privates) {
4711   bool InitRequired = false;
4712   for (const PrivateDataTy &Pair : Privates) {
4713     const VarDecl *VD = Pair.second.PrivateCopy;
4714     const Expr *Init = VD->getAnyInitializer();
4715     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4716                                     !CGF.isTrivialInitializer(Init));
4717     if (InitRequired)
4718       break;
4719   }
4720   return InitRequired;
4721 }
4722 
4723 
4724 /// Emit task_dup function (for initialization of
4725 /// private/firstprivate/lastprivate vars and last_iter flag)
4726 /// \code
4727 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4728 /// lastpriv) {
4729 /// // setup lastprivate flag
4730 ///    task_dst->last = lastpriv;
4731 /// // could be constructor calls here...
4732 /// }
4733 /// \endcode
4734 static llvm::Value *
4735 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4736                     const OMPExecutableDirective &D,
4737                     QualType KmpTaskTWithPrivatesPtrQTy,
4738                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4739                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4740                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4741                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4742   ASTContext &C = CGM.getContext();
4743   FunctionArgList Args;
4744   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4745                            KmpTaskTWithPrivatesPtrQTy,
4746                            ImplicitParamDecl::Other);
4747   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4748                            KmpTaskTWithPrivatesPtrQTy,
4749                            ImplicitParamDecl::Other);
4750   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4751                                 ImplicitParamDecl::Other);
4752   Args.push_back(&DstArg);
4753   Args.push_back(&SrcArg);
4754   Args.push_back(&LastprivArg);
4755   const auto &TaskDupFnInfo =
4756       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4757   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4758   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4759   auto *TaskDup = llvm::Function::Create(
4760       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4761   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4762   TaskDup->setDoesNotRecurse();
4763   CodeGenFunction CGF(CGM);
4764   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4765                     Loc);
4766 
4767   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4768       CGF.GetAddrOfLocalVar(&DstArg),
4769       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4770   // task_dst->liter = lastpriv;
4771   if (WithLastIter) {
4772     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4773     LValue Base = CGF.EmitLValueForField(
4774         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4775     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4776     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4777         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4778     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4779   }
4780 
4781   // Emit initial values for private copies (if any).
4782   assert(!Privates.empty());
4783   Address KmpTaskSharedsPtr = Address::invalid();
4784   if (!Data.FirstprivateVars.empty()) {
4785     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4786         CGF.GetAddrOfLocalVar(&SrcArg),
4787         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4788     LValue Base = CGF.EmitLValueForField(
4789         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4790     KmpTaskSharedsPtr = Address(
4791         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4792                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4793                                                   KmpTaskTShareds)),
4794                              Loc),
4795         CGF.getNaturalTypeAlignment(SharedsTy));
4796   }
4797   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4798                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4799   CGF.FinishFunction();
4800   return TaskDup;
4801 }
4802 
4803 /// Checks if destructor function is required to be generated.
4804 /// \return true if cleanups are required, false otherwise.
4805 static bool
4806 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4807   bool NeedsCleanup = false;
4808   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4809   const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4810   for (const FieldDecl *FD : PrivateRD->fields()) {
4811     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4812     if (NeedsCleanup)
4813       break;
4814   }
4815   return NeedsCleanup;
4816 }
4817 
4818 CGOpenMPRuntime::TaskResultTy
4819 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4820                               const OMPExecutableDirective &D,
4821                               llvm::Value *TaskFunction, QualType SharedsTy,
4822                               Address Shareds, const OMPTaskDataTy &Data) {
4823   ASTContext &C = CGM.getContext();
4824   llvm::SmallVector<PrivateDataTy, 4> Privates;
4825   // Aggregate privates and sort them by the alignment.
4826   auto I = Data.PrivateCopies.begin();
4827   for (const Expr *E : Data.PrivateVars) {
4828     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4829     Privates.emplace_back(
4830         C.getDeclAlign(VD),
4831         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4832                          /*PrivateElemInit=*/nullptr));
4833     ++I;
4834   }
4835   I = Data.FirstprivateCopies.begin();
4836   auto IElemInitRef = Data.FirstprivateInits.begin();
4837   for (const Expr *E : Data.FirstprivateVars) {
4838     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4839     Privates.emplace_back(
4840         C.getDeclAlign(VD),
4841         PrivateHelpersTy(
4842             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4843             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4844     ++I;
4845     ++IElemInitRef;
4846   }
4847   I = Data.LastprivateCopies.begin();
4848   for (const Expr *E : Data.LastprivateVars) {
4849     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4850     Privates.emplace_back(
4851         C.getDeclAlign(VD),
4852         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4853                          /*PrivateElemInit=*/nullptr));
4854     ++I;
4855   }
4856   std::stable_sort(Privates.begin(), Privates.end(), stable_sort_comparator);
4857   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4858   // Build type kmp_routine_entry_t (if not built yet).
4859   emitKmpRoutineEntryT(KmpInt32Ty);
4860   // Build type kmp_task_t (if not built yet).
4861   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4862     if (SavedKmpTaskloopTQTy.isNull()) {
4863       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4864           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4865     }
4866     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4867   } else {
4868     assert((D.getDirectiveKind() == OMPD_task ||
4869             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4870             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4871            "Expected taskloop, task or target directive");
4872     if (SavedKmpTaskTQTy.isNull()) {
4873       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4874           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4875     }
4876     KmpTaskTQTy = SavedKmpTaskTQTy;
4877   }
4878   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4879   // Build particular struct kmp_task_t for the given task.
4880   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4881       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4882   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4883   QualType KmpTaskTWithPrivatesPtrQTy =
4884       C.getPointerType(KmpTaskTWithPrivatesQTy);
4885   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4886   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4887       KmpTaskTWithPrivatesTy->getPointerTo();
4888   llvm::Value *KmpTaskTWithPrivatesTySize =
4889       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4890   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4891 
4892   // Emit initial values for private copies (if any).
4893   llvm::Value *TaskPrivatesMap = nullptr;
4894   llvm::Type *TaskPrivatesMapTy =
4895       std::next(cast<llvm::Function>(TaskFunction)->arg_begin(), 3)->getType();
4896   if (!Privates.empty()) {
4897     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4898     TaskPrivatesMap = emitTaskPrivateMappingFunction(
4899         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
4900         FI->getType(), Privates);
4901     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4902         TaskPrivatesMap, TaskPrivatesMapTy);
4903   } else {
4904     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4905         cast<llvm::PointerType>(TaskPrivatesMapTy));
4906   }
4907   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4908   // kmp_task_t *tt);
4909   llvm::Value *TaskEntry = emitProxyTaskFunction(
4910       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4911       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4912       TaskPrivatesMap);
4913 
4914   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4915   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4916   // kmp_routine_entry_t *task_entry);
4917   // Task flags. Format is taken from
4918   // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
4919   // description of kmp_tasking_flags struct.
4920   enum {
4921     TiedFlag = 0x1,
4922     FinalFlag = 0x2,
4923     DestructorsFlag = 0x8,
4924     PriorityFlag = 0x20
4925   };
4926   unsigned Flags = Data.Tied ? TiedFlag : 0;
4927   bool NeedsCleanup = false;
4928   if (!Privates.empty()) {
4929     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
4930     if (NeedsCleanup)
4931       Flags = Flags | DestructorsFlag;
4932   }
4933   if (Data.Priority.getInt())
4934     Flags = Flags | PriorityFlag;
4935   llvm::Value *TaskFlags =
4936       Data.Final.getPointer()
4937           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4938                                      CGF.Builder.getInt32(FinalFlag),
4939                                      CGF.Builder.getInt32(/*C=*/0))
4940           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4941   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4942   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4943   llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
4944                               getThreadID(CGF, Loc), TaskFlags,
4945                               KmpTaskTWithPrivatesTySize, SharedsSize,
4946                               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4947                                   TaskEntry, KmpRoutineEntryPtrTy)};
4948   llvm::Value *NewTask = CGF.EmitRuntimeCall(
4949       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
4950   llvm::Value *NewTaskNewTaskTTy =
4951       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4952           NewTask, KmpTaskTWithPrivatesPtrTy);
4953   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4954                                                KmpTaskTWithPrivatesQTy);
4955   LValue TDBase =
4956       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4957   // Fill the data in the resulting kmp_task_t record.
4958   // Copy shareds if there are any.
4959   Address KmpTaskSharedsPtr = Address::invalid();
4960   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4961     KmpTaskSharedsPtr =
4962         Address(CGF.EmitLoadOfScalar(
4963                     CGF.EmitLValueForField(
4964                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4965                                            KmpTaskTShareds)),
4966                     Loc),
4967                 CGF.getNaturalTypeAlignment(SharedsTy));
4968     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4969     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4970     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4971   }
4972   // Emit initial values for private copies (if any).
4973   TaskResultTy Result;
4974   if (!Privates.empty()) {
4975     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4976                      SharedsTy, SharedsPtrTy, Data, Privates,
4977                      /*ForDup=*/false);
4978     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4979         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4980       Result.TaskDupFn = emitTaskDupFunction(
4981           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4982           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4983           /*WithLastIter=*/!Data.LastprivateVars.empty());
4984     }
4985   }
4986   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4987   enum { Priority = 0, Destructors = 1 };
4988   // Provide pointer to function with destructors for privates.
4989   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4990   const RecordDecl *KmpCmplrdataUD =
4991       (*FI)->getType()->getAsUnionType()->getDecl();
4992   if (NeedsCleanup) {
4993     llvm::Value *DestructorFn = emitDestructorsFunction(
4994         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4995         KmpTaskTWithPrivatesQTy);
4996     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4997     LValue DestructorsLV = CGF.EmitLValueForField(
4998         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4999     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5000                               DestructorFn, KmpRoutineEntryPtrTy),
5001                           DestructorsLV);
5002   }
5003   // Set priority.
5004   if (Data.Priority.getInt()) {
5005     LValue Data2LV = CGF.EmitLValueForField(
5006         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
5007     LValue PriorityLV = CGF.EmitLValueForField(
5008         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
5009     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
5010   }
5011   Result.NewTask = NewTask;
5012   Result.TaskEntry = TaskEntry;
5013   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
5014   Result.TDBase = TDBase;
5015   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
5016   return Result;
5017 }
5018 
5019 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5020                                    const OMPExecutableDirective &D,
5021                                    llvm::Value *TaskFunction,
5022                                    QualType SharedsTy, Address Shareds,
5023                                    const Expr *IfCond,
5024                                    const OMPTaskDataTy &Data) {
5025   if (!CGF.HaveInsertPoint())
5026     return;
5027 
5028   TaskResultTy Result =
5029       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5030   llvm::Value *NewTask = Result.NewTask;
5031   llvm::Value *TaskEntry = Result.TaskEntry;
5032   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5033   LValue TDBase = Result.TDBase;
5034   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5035   ASTContext &C = CGM.getContext();
5036   // Process list of dependences.
5037   Address DependenciesArray = Address::invalid();
5038   unsigned NumDependencies = Data.Dependences.size();
5039   if (NumDependencies) {
5040     // Dependence kind for RTL.
5041     enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 };
5042     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
5043     RecordDecl *KmpDependInfoRD;
5044     QualType FlagsTy =
5045         C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
5046     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5047     if (KmpDependInfoTy.isNull()) {
5048       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
5049       KmpDependInfoRD->startDefinition();
5050       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
5051       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
5052       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
5053       KmpDependInfoRD->completeDefinition();
5054       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
5055     } else {
5056       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5057     }
5058     CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy);
5059     // Define type kmp_depend_info[<Dependences.size()>];
5060     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5061         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
5062         ArrayType::Normal, /*IndexTypeQuals=*/0);
5063     // kmp_depend_info[<Dependences.size()>] deps;
5064     DependenciesArray =
5065         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
5066     for (unsigned I = 0; I < NumDependencies; ++I) {
5067       const Expr *E = Data.Dependences[I].second;
5068       LValue Addr = CGF.EmitLValue(E);
5069       llvm::Value *Size;
5070       QualType Ty = E->getType();
5071       if (const auto *ASE =
5072               dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
5073         LValue UpAddrLVal =
5074             CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
5075         llvm::Value *UpAddr =
5076             CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
5077         llvm::Value *LowIntPtr =
5078             CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
5079         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
5080         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
5081       } else {
5082         Size = CGF.getTypeSize(Ty);
5083       }
5084       LValue Base = CGF.MakeAddrLValue(
5085           CGF.Builder.CreateConstArrayGEP(DependenciesArray, I, DependencySize),
5086           KmpDependInfoTy);
5087       // deps[i].base_addr = &<Dependences[i].second>;
5088       LValue BaseAddrLVal = CGF.EmitLValueForField(
5089           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5090       CGF.EmitStoreOfScalar(
5091           CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
5092           BaseAddrLVal);
5093       // deps[i].len = sizeof(<Dependences[i].second>);
5094       LValue LenLVal = CGF.EmitLValueForField(
5095           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
5096       CGF.EmitStoreOfScalar(Size, LenLVal);
5097       // deps[i].flags = <Dependences[i].first>;
5098       RTLDependenceKindTy DepKind;
5099       switch (Data.Dependences[I].first) {
5100       case OMPC_DEPEND_in:
5101         DepKind = DepIn;
5102         break;
5103       // Out and InOut dependencies must use the same code.
5104       case OMPC_DEPEND_out:
5105       case OMPC_DEPEND_inout:
5106         DepKind = DepInOut;
5107         break;
5108       case OMPC_DEPEND_source:
5109       case OMPC_DEPEND_sink:
5110       case OMPC_DEPEND_unknown:
5111         llvm_unreachable("Unknown task dependence type");
5112       }
5113       LValue FlagsLVal = CGF.EmitLValueForField(
5114           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5115       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5116                             FlagsLVal);
5117     }
5118     DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5119         CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()),
5120         CGF.VoidPtrTy);
5121   }
5122 
5123   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5124   // libcall.
5125   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5126   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5127   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5128   // list is not empty
5129   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5130   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5131   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5132   llvm::Value *DepTaskArgs[7];
5133   if (NumDependencies) {
5134     DepTaskArgs[0] = UpLoc;
5135     DepTaskArgs[1] = ThreadID;
5136     DepTaskArgs[2] = NewTask;
5137     DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
5138     DepTaskArgs[4] = DependenciesArray.getPointer();
5139     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5140     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5141   }
5142   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
5143                         &TaskArgs,
5144                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5145     if (!Data.Tied) {
5146       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5147       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5148       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5149     }
5150     if (NumDependencies) {
5151       CGF.EmitRuntimeCall(
5152           createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
5153     } else {
5154       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
5155                           TaskArgs);
5156     }
5157     // Check if parent region is untied and build return for untied task;
5158     if (auto *Region =
5159             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5160       Region->emitUntiedSwitch(CGF);
5161   };
5162 
5163   llvm::Value *DepWaitTaskArgs[6];
5164   if (NumDependencies) {
5165     DepWaitTaskArgs[0] = UpLoc;
5166     DepWaitTaskArgs[1] = ThreadID;
5167     DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
5168     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5169     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5170     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5171   }
5172   auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
5173                         NumDependencies, &DepWaitTaskArgs,
5174                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5175     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5176     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5177     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5178     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5179     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5180     // is specified.
5181     if (NumDependencies)
5182       CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
5183                           DepWaitTaskArgs);
5184     // Call proxy_task_entry(gtid, new_task);
5185     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5186                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5187       Action.Enter(CGF);
5188       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5189       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5190                                                           OutlinedFnArgs);
5191     };
5192 
5193     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5194     // kmp_task_t *new_task);
5195     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5196     // kmp_task_t *new_task);
5197     RegionCodeGenTy RCG(CodeGen);
5198     CommonActionTy Action(
5199         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
5200         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
5201     RCG.setAction(Action);
5202     RCG(CGF);
5203   };
5204 
5205   if (IfCond) {
5206     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5207   } else {
5208     RegionCodeGenTy ThenRCG(ThenCodeGen);
5209     ThenRCG(CGF);
5210   }
5211 }
5212 
5213 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5214                                        const OMPLoopDirective &D,
5215                                        llvm::Value *TaskFunction,
5216                                        QualType SharedsTy, Address Shareds,
5217                                        const Expr *IfCond,
5218                                        const OMPTaskDataTy &Data) {
5219   if (!CGF.HaveInsertPoint())
5220     return;
5221   TaskResultTy Result =
5222       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5223   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5224   // libcall.
5225   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5226   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5227   // sched, kmp_uint64 grainsize, void *task_dup);
5228   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5229   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5230   llvm::Value *IfVal;
5231   if (IfCond) {
5232     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5233                                       /*isSigned=*/true);
5234   } else {
5235     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5236   }
5237 
5238   LValue LBLVal = CGF.EmitLValueForField(
5239       Result.TDBase,
5240       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5241   const auto *LBVar =
5242       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5243   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
5244                        /*IsInitializer=*/true);
5245   LValue UBLVal = CGF.EmitLValueForField(
5246       Result.TDBase,
5247       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5248   const auto *UBVar =
5249       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5250   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
5251                        /*IsInitializer=*/true);
5252   LValue StLVal = CGF.EmitLValueForField(
5253       Result.TDBase,
5254       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5255   const auto *StVar =
5256       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5257   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
5258                        /*IsInitializer=*/true);
5259   // Store reductions address.
5260   LValue RedLVal = CGF.EmitLValueForField(
5261       Result.TDBase,
5262       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5263   if (Data.Reductions) {
5264     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5265   } else {
5266     CGF.EmitNullInitialization(RedLVal.getAddress(),
5267                                CGF.getContext().VoidPtrTy);
5268   }
5269   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5270   llvm::Value *TaskArgs[] = {
5271       UpLoc,
5272       ThreadID,
5273       Result.NewTask,
5274       IfVal,
5275       LBLVal.getPointer(),
5276       UBLVal.getPointer(),
5277       CGF.EmitLoadOfScalar(StLVal, Loc),
5278       llvm::ConstantInt::getSigned(
5279               CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5280       llvm::ConstantInt::getSigned(
5281           CGF.IntTy, Data.Schedule.getPointer()
5282                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5283                          : NoSchedule),
5284       Data.Schedule.getPointer()
5285           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5286                                       /*isSigned=*/false)
5287           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5288       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5289                              Result.TaskDupFn, CGF.VoidPtrTy)
5290                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5291   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
5292 }
5293 
5294 /// Emit reduction operation for each element of array (required for
5295 /// array sections) LHS op = RHS.
5296 /// \param Type Type of array.
5297 /// \param LHSVar Variable on the left side of the reduction operation
5298 /// (references element of array in original variable).
5299 /// \param RHSVar Variable on the right side of the reduction operation
5300 /// (references element of array in original variable).
5301 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5302 /// RHSVar.
5303 static void EmitOMPAggregateReduction(
5304     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5305     const VarDecl *RHSVar,
5306     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5307                                   const Expr *, const Expr *)> &RedOpGen,
5308     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5309     const Expr *UpExpr = nullptr) {
5310   // Perform element-by-element initialization.
5311   QualType ElementTy;
5312   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5313   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5314 
5315   // Drill down to the base element type on both arrays.
5316   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5317   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5318 
5319   llvm::Value *RHSBegin = RHSAddr.getPointer();
5320   llvm::Value *LHSBegin = LHSAddr.getPointer();
5321   // Cast from pointer to array type to pointer to single element.
5322   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5323   // The basic structure here is a while-do loop.
5324   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5325   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5326   llvm::Value *IsEmpty =
5327       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5328   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5329 
5330   // Enter the loop body, making that address the current address.
5331   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5332   CGF.EmitBlock(BodyBB);
5333 
5334   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5335 
5336   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5337       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5338   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5339   Address RHSElementCurrent =
5340       Address(RHSElementPHI,
5341               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5342 
5343   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5344       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5345   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5346   Address LHSElementCurrent =
5347       Address(LHSElementPHI,
5348               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5349 
5350   // Emit copy.
5351   CodeGenFunction::OMPPrivateScope Scope(CGF);
5352   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5353   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5354   Scope.Privatize();
5355   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5356   Scope.ForceCleanup();
5357 
5358   // Shift the address forward by one element.
5359   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5360       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5361   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5362       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5363   // Check whether we've reached the end.
5364   llvm::Value *Done =
5365       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5366   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5367   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5368   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5369 
5370   // Done.
5371   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5372 }
5373 
5374 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5375 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5376 /// UDR combiner function.
5377 static void emitReductionCombiner(CodeGenFunction &CGF,
5378                                   const Expr *ReductionOp) {
5379   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5380     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5381       if (const auto *DRE =
5382               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5383         if (const auto *DRD =
5384                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5385           std::pair<llvm::Function *, llvm::Function *> Reduction =
5386               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5387           RValue Func = RValue::get(Reduction.first);
5388           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5389           CGF.EmitIgnoredExpr(ReductionOp);
5390           return;
5391         }
5392   CGF.EmitIgnoredExpr(ReductionOp);
5393 }
5394 
5395 llvm::Value *CGOpenMPRuntime::emitReductionFunction(
5396     CodeGenModule &CGM, SourceLocation Loc, llvm::Type *ArgsType,
5397     ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
5398     ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
5399   ASTContext &C = CGM.getContext();
5400 
5401   // void reduction_func(void *LHSArg, void *RHSArg);
5402   FunctionArgList Args;
5403   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5404                            ImplicitParamDecl::Other);
5405   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5406                            ImplicitParamDecl::Other);
5407   Args.push_back(&LHSArg);
5408   Args.push_back(&RHSArg);
5409   const auto &CGFI =
5410       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5411   std::string Name = getName({"omp", "reduction", "reduction_func"});
5412   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5413                                     llvm::GlobalValue::InternalLinkage, Name,
5414                                     &CGM.getModule());
5415   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5416   Fn->setDoesNotRecurse();
5417   CodeGenFunction CGF(CGM);
5418   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5419 
5420   // Dst = (void*[n])(LHSArg);
5421   // Src = (void*[n])(RHSArg);
5422   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5423       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5424       ArgsType), CGF.getPointerAlign());
5425   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5426       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5427       ArgsType), CGF.getPointerAlign());
5428 
5429   //  ...
5430   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5431   //  ...
5432   CodeGenFunction::OMPPrivateScope Scope(CGF);
5433   auto IPriv = Privates.begin();
5434   unsigned Idx = 0;
5435   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5436     const auto *RHSVar =
5437         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5438     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5439       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5440     });
5441     const auto *LHSVar =
5442         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5443     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5444       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5445     });
5446     QualType PrivTy = (*IPriv)->getType();
5447     if (PrivTy->isVariablyModifiedType()) {
5448       // Get array size and emit VLA type.
5449       ++Idx;
5450       Address Elem =
5451           CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize());
5452       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5453       const VariableArrayType *VLA =
5454           CGF.getContext().getAsVariableArrayType(PrivTy);
5455       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5456       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5457           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5458       CGF.EmitVariablyModifiedType(PrivTy);
5459     }
5460   }
5461   Scope.Privatize();
5462   IPriv = Privates.begin();
5463   auto ILHS = LHSExprs.begin();
5464   auto IRHS = RHSExprs.begin();
5465   for (const Expr *E : ReductionOps) {
5466     if ((*IPriv)->getType()->isArrayType()) {
5467       // Emit reduction for array section.
5468       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5469       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5470       EmitOMPAggregateReduction(
5471           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5472           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5473             emitReductionCombiner(CGF, E);
5474           });
5475     } else {
5476       // Emit reduction for array subscript or single variable.
5477       emitReductionCombiner(CGF, E);
5478     }
5479     ++IPriv;
5480     ++ILHS;
5481     ++IRHS;
5482   }
5483   Scope.ForceCleanup();
5484   CGF.FinishFunction();
5485   return Fn;
5486 }
5487 
5488 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5489                                                   const Expr *ReductionOp,
5490                                                   const Expr *PrivateRef,
5491                                                   const DeclRefExpr *LHS,
5492                                                   const DeclRefExpr *RHS) {
5493   if (PrivateRef->getType()->isArrayType()) {
5494     // Emit reduction for array section.
5495     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5496     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5497     EmitOMPAggregateReduction(
5498         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5499         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5500           emitReductionCombiner(CGF, ReductionOp);
5501         });
5502   } else {
5503     // Emit reduction for array subscript or single variable.
5504     emitReductionCombiner(CGF, ReductionOp);
5505   }
5506 }
5507 
5508 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5509                                     ArrayRef<const Expr *> Privates,
5510                                     ArrayRef<const Expr *> LHSExprs,
5511                                     ArrayRef<const Expr *> RHSExprs,
5512                                     ArrayRef<const Expr *> ReductionOps,
5513                                     ReductionOptionsTy Options) {
5514   if (!CGF.HaveInsertPoint())
5515     return;
5516 
5517   bool WithNowait = Options.WithNowait;
5518   bool SimpleReduction = Options.SimpleReduction;
5519 
5520   // Next code should be emitted for reduction:
5521   //
5522   // static kmp_critical_name lock = { 0 };
5523   //
5524   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5525   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5526   //  ...
5527   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5528   //  *(Type<n>-1*)rhs[<n>-1]);
5529   // }
5530   //
5531   // ...
5532   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5533   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5534   // RedList, reduce_func, &<lock>)) {
5535   // case 1:
5536   //  ...
5537   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5538   //  ...
5539   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5540   // break;
5541   // case 2:
5542   //  ...
5543   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5544   //  ...
5545   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5546   // break;
5547   // default:;
5548   // }
5549   //
5550   // if SimpleReduction is true, only the next code is generated:
5551   //  ...
5552   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5553   //  ...
5554 
5555   ASTContext &C = CGM.getContext();
5556 
5557   if (SimpleReduction) {
5558     CodeGenFunction::RunCleanupsScope Scope(CGF);
5559     auto IPriv = Privates.begin();
5560     auto ILHS = LHSExprs.begin();
5561     auto IRHS = RHSExprs.begin();
5562     for (const Expr *E : ReductionOps) {
5563       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5564                                   cast<DeclRefExpr>(*IRHS));
5565       ++IPriv;
5566       ++ILHS;
5567       ++IRHS;
5568     }
5569     return;
5570   }
5571 
5572   // 1. Build a list of reduction variables.
5573   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5574   auto Size = RHSExprs.size();
5575   for (const Expr *E : Privates) {
5576     if (E->getType()->isVariablyModifiedType())
5577       // Reserve place for array size.
5578       ++Size;
5579   }
5580   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5581   QualType ReductionArrayTy =
5582       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
5583                              /*IndexTypeQuals=*/0);
5584   Address ReductionList =
5585       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5586   auto IPriv = Privates.begin();
5587   unsigned Idx = 0;
5588   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5589     Address Elem =
5590       CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize());
5591     CGF.Builder.CreateStore(
5592         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5593             CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
5594         Elem);
5595     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5596       // Store array size.
5597       ++Idx;
5598       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
5599                                              CGF.getPointerSize());
5600       llvm::Value *Size = CGF.Builder.CreateIntCast(
5601           CGF.getVLASize(
5602                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5603               .NumElts,
5604           CGF.SizeTy, /*isSigned=*/false);
5605       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5606                               Elem);
5607     }
5608   }
5609 
5610   // 2. Emit reduce_func().
5611   llvm::Value *ReductionFn = emitReductionFunction(
5612       CGM, Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(),
5613       Privates, LHSExprs, RHSExprs, ReductionOps);
5614 
5615   // 3. Create static kmp_critical_name lock = { 0 };
5616   std::string Name = getName({"reduction"});
5617   llvm::Value *Lock = getCriticalRegionLock(Name);
5618 
5619   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5620   // RedList, reduce_func, &<lock>);
5621   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5622   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5623   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5624   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5625       ReductionList.getPointer(), CGF.VoidPtrTy);
5626   llvm::Value *Args[] = {
5627       IdentTLoc,                             // ident_t *<loc>
5628       ThreadId,                              // i32 <gtid>
5629       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5630       ReductionArrayTySize,                  // size_type sizeof(RedList)
5631       RL,                                    // void *RedList
5632       ReductionFn, // void (*) (void *, void *) <reduce_func>
5633       Lock         // kmp_critical_name *&<lock>
5634   };
5635   llvm::Value *Res = CGF.EmitRuntimeCall(
5636       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
5637                                        : OMPRTL__kmpc_reduce),
5638       Args);
5639 
5640   // 5. Build switch(res)
5641   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5642   llvm::SwitchInst *SwInst =
5643       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5644 
5645   // 6. Build case 1:
5646   //  ...
5647   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5648   //  ...
5649   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5650   // break;
5651   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5652   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5653   CGF.EmitBlock(Case1BB);
5654 
5655   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5656   llvm::Value *EndArgs[] = {
5657       IdentTLoc, // ident_t *<loc>
5658       ThreadId,  // i32 <gtid>
5659       Lock       // kmp_critical_name *&<lock>
5660   };
5661   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5662                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5663     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5664     auto IPriv = Privates.begin();
5665     auto ILHS = LHSExprs.begin();
5666     auto IRHS = RHSExprs.begin();
5667     for (const Expr *E : ReductionOps) {
5668       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5669                                      cast<DeclRefExpr>(*IRHS));
5670       ++IPriv;
5671       ++ILHS;
5672       ++IRHS;
5673     }
5674   };
5675   RegionCodeGenTy RCG(CodeGen);
5676   CommonActionTy Action(
5677       nullptr, llvm::None,
5678       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
5679                                        : OMPRTL__kmpc_end_reduce),
5680       EndArgs);
5681   RCG.setAction(Action);
5682   RCG(CGF);
5683 
5684   CGF.EmitBranch(DefaultBB);
5685 
5686   // 7. Build case 2:
5687   //  ...
5688   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5689   //  ...
5690   // break;
5691   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5692   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5693   CGF.EmitBlock(Case2BB);
5694 
5695   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5696                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5697     auto ILHS = LHSExprs.begin();
5698     auto IRHS = RHSExprs.begin();
5699     auto IPriv = Privates.begin();
5700     for (const Expr *E : ReductionOps) {
5701       const Expr *XExpr = nullptr;
5702       const Expr *EExpr = nullptr;
5703       const Expr *UpExpr = nullptr;
5704       BinaryOperatorKind BO = BO_Comma;
5705       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5706         if (BO->getOpcode() == BO_Assign) {
5707           XExpr = BO->getLHS();
5708           UpExpr = BO->getRHS();
5709         }
5710       }
5711       // Try to emit update expression as a simple atomic.
5712       const Expr *RHSExpr = UpExpr;
5713       if (RHSExpr) {
5714         // Analyze RHS part of the whole expression.
5715         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5716                 RHSExpr->IgnoreParenImpCasts())) {
5717           // If this is a conditional operator, analyze its condition for
5718           // min/max reduction operator.
5719           RHSExpr = ACO->getCond();
5720         }
5721         if (const auto *BORHS =
5722                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5723           EExpr = BORHS->getRHS();
5724           BO = BORHS->getOpcode();
5725         }
5726       }
5727       if (XExpr) {
5728         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5729         auto &&AtomicRedGen = [BO, VD,
5730                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5731                                     const Expr *EExpr, const Expr *UpExpr) {
5732           LValue X = CGF.EmitLValue(XExpr);
5733           RValue E;
5734           if (EExpr)
5735             E = CGF.EmitAnyExpr(EExpr);
5736           CGF.EmitOMPAtomicSimpleUpdateExpr(
5737               X, E, BO, /*IsXLHSInRHSPart=*/true,
5738               llvm::AtomicOrdering::Monotonic, Loc,
5739               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5740                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5741                 PrivateScope.addPrivate(
5742                     VD, [&CGF, VD, XRValue, Loc]() {
5743                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5744                       CGF.emitOMPSimpleStore(
5745                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5746                           VD->getType().getNonReferenceType(), Loc);
5747                       return LHSTemp;
5748                     });
5749                 (void)PrivateScope.Privatize();
5750                 return CGF.EmitAnyExpr(UpExpr);
5751               });
5752         };
5753         if ((*IPriv)->getType()->isArrayType()) {
5754           // Emit atomic reduction for array section.
5755           const auto *RHSVar =
5756               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5757           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5758                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5759         } else {
5760           // Emit atomic reduction for array subscript or single variable.
5761           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5762         }
5763       } else {
5764         // Emit as a critical region.
5765         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5766                                            const Expr *, const Expr *) {
5767           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5768           std::string Name = RT.getName({"atomic_reduction"});
5769           RT.emitCriticalRegion(
5770               CGF, Name,
5771               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5772                 Action.Enter(CGF);
5773                 emitReductionCombiner(CGF, E);
5774               },
5775               Loc);
5776         };
5777         if ((*IPriv)->getType()->isArrayType()) {
5778           const auto *LHSVar =
5779               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5780           const auto *RHSVar =
5781               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5782           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5783                                     CritRedGen);
5784         } else {
5785           CritRedGen(CGF, nullptr, nullptr, nullptr);
5786         }
5787       }
5788       ++ILHS;
5789       ++IRHS;
5790       ++IPriv;
5791     }
5792   };
5793   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5794   if (!WithNowait) {
5795     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5796     llvm::Value *EndArgs[] = {
5797         IdentTLoc, // ident_t *<loc>
5798         ThreadId,  // i32 <gtid>
5799         Lock       // kmp_critical_name *&<lock>
5800     };
5801     CommonActionTy Action(nullptr, llvm::None,
5802                           createRuntimeFunction(OMPRTL__kmpc_end_reduce),
5803                           EndArgs);
5804     AtomicRCG.setAction(Action);
5805     AtomicRCG(CGF);
5806   } else {
5807     AtomicRCG(CGF);
5808   }
5809 
5810   CGF.EmitBranch(DefaultBB);
5811   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5812 }
5813 
5814 /// Generates unique name for artificial threadprivate variables.
5815 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5816 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5817                                       const Expr *Ref) {
5818   SmallString<256> Buffer;
5819   llvm::raw_svector_ostream Out(Buffer);
5820   const clang::DeclRefExpr *DE;
5821   const VarDecl *D = ::getBaseDecl(Ref, DE);
5822   if (!D)
5823     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5824   D = D->getCanonicalDecl();
5825   std::string Name = CGM.getOpenMPRuntime().getName(
5826       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5827   Out << Prefix << Name << "_"
5828       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5829   return Out.str();
5830 }
5831 
5832 /// Emits reduction initializer function:
5833 /// \code
5834 /// void @.red_init(void* %arg) {
5835 /// %0 = bitcast void* %arg to <type>*
5836 /// store <type> <init>, <type>* %0
5837 /// ret void
5838 /// }
5839 /// \endcode
5840 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5841                                            SourceLocation Loc,
5842                                            ReductionCodeGen &RCG, unsigned N) {
5843   ASTContext &C = CGM.getContext();
5844   FunctionArgList Args;
5845   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5846                           ImplicitParamDecl::Other);
5847   Args.emplace_back(&Param);
5848   const auto &FnInfo =
5849       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5850   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5851   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5852   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5853                                     Name, &CGM.getModule());
5854   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5855   Fn->setDoesNotRecurse();
5856   CodeGenFunction CGF(CGM);
5857   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5858   Address PrivateAddr = CGF.EmitLoadOfPointer(
5859       CGF.GetAddrOfLocalVar(&Param),
5860       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5861   llvm::Value *Size = nullptr;
5862   // If the size of the reduction item is non-constant, load it from global
5863   // threadprivate variable.
5864   if (RCG.getSizes(N).second) {
5865     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5866         CGF, CGM.getContext().getSizeType(),
5867         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5868     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5869                                 CGM.getContext().getSizeType(), Loc);
5870   }
5871   RCG.emitAggregateType(CGF, N, Size);
5872   LValue SharedLVal;
5873   // If initializer uses initializer from declare reduction construct, emit a
5874   // pointer to the address of the original reduction item (reuired by reduction
5875   // initializer)
5876   if (RCG.usesReductionInitializer(N)) {
5877     Address SharedAddr =
5878         CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5879             CGF, CGM.getContext().VoidPtrTy,
5880             generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
5881     SharedAddr = CGF.EmitLoadOfPointer(
5882         SharedAddr,
5883         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5884     SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5885   } else {
5886     SharedLVal = CGF.MakeNaturalAlignAddrLValue(
5887         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5888         CGM.getContext().VoidPtrTy);
5889   }
5890   // Emit the initializer:
5891   // %0 = bitcast void* %arg to <type>*
5892   // store <type> <init>, <type>* %0
5893   RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
5894                          [](CodeGenFunction &) { return false; });
5895   CGF.FinishFunction();
5896   return Fn;
5897 }
5898 
5899 /// Emits reduction combiner function:
5900 /// \code
5901 /// void @.red_comb(void* %arg0, void* %arg1) {
5902 /// %lhs = bitcast void* %arg0 to <type>*
5903 /// %rhs = bitcast void* %arg1 to <type>*
5904 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5905 /// store <type> %2, <type>* %lhs
5906 /// ret void
5907 /// }
5908 /// \endcode
5909 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5910                                            SourceLocation Loc,
5911                                            ReductionCodeGen &RCG, unsigned N,
5912                                            const Expr *ReductionOp,
5913                                            const Expr *LHS, const Expr *RHS,
5914                                            const Expr *PrivateRef) {
5915   ASTContext &C = CGM.getContext();
5916   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5917   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5918   FunctionArgList Args;
5919   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5920                                C.VoidPtrTy, ImplicitParamDecl::Other);
5921   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5922                             ImplicitParamDecl::Other);
5923   Args.emplace_back(&ParamInOut);
5924   Args.emplace_back(&ParamIn);
5925   const auto &FnInfo =
5926       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5927   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5928   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5929   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5930                                     Name, &CGM.getModule());
5931   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5932   Fn->setDoesNotRecurse();
5933   CodeGenFunction CGF(CGM);
5934   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5935   llvm::Value *Size = nullptr;
5936   // If the size of the reduction item is non-constant, load it from global
5937   // threadprivate variable.
5938   if (RCG.getSizes(N).second) {
5939     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5940         CGF, CGM.getContext().getSizeType(),
5941         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5942     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5943                                 CGM.getContext().getSizeType(), Loc);
5944   }
5945   RCG.emitAggregateType(CGF, N, Size);
5946   // Remap lhs and rhs variables to the addresses of the function arguments.
5947   // %lhs = bitcast void* %arg0 to <type>*
5948   // %rhs = bitcast void* %arg1 to <type>*
5949   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5950   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5951     // Pull out the pointer to the variable.
5952     Address PtrAddr = CGF.EmitLoadOfPointer(
5953         CGF.GetAddrOfLocalVar(&ParamInOut),
5954         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5955     return CGF.Builder.CreateElementBitCast(
5956         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5957   });
5958   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5959     // Pull out the pointer to the variable.
5960     Address PtrAddr = CGF.EmitLoadOfPointer(
5961         CGF.GetAddrOfLocalVar(&ParamIn),
5962         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5963     return CGF.Builder.CreateElementBitCast(
5964         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5965   });
5966   PrivateScope.Privatize();
5967   // Emit the combiner body:
5968   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5969   // store <type> %2, <type>* %lhs
5970   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5971       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5972       cast<DeclRefExpr>(RHS));
5973   CGF.FinishFunction();
5974   return Fn;
5975 }
5976 
5977 /// Emits reduction finalizer function:
5978 /// \code
5979 /// void @.red_fini(void* %arg) {
5980 /// %0 = bitcast void* %arg to <type>*
5981 /// <destroy>(<type>* %0)
5982 /// ret void
5983 /// }
5984 /// \endcode
5985 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5986                                            SourceLocation Loc,
5987                                            ReductionCodeGen &RCG, unsigned N) {
5988   if (!RCG.needCleanups(N))
5989     return nullptr;
5990   ASTContext &C = CGM.getContext();
5991   FunctionArgList Args;
5992   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5993                           ImplicitParamDecl::Other);
5994   Args.emplace_back(&Param);
5995   const auto &FnInfo =
5996       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5997   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5998   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5999   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6000                                     Name, &CGM.getModule());
6001   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6002   Fn->setDoesNotRecurse();
6003   CodeGenFunction CGF(CGM);
6004   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6005   Address PrivateAddr = CGF.EmitLoadOfPointer(
6006       CGF.GetAddrOfLocalVar(&Param),
6007       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6008   llvm::Value *Size = nullptr;
6009   // If the size of the reduction item is non-constant, load it from global
6010   // threadprivate variable.
6011   if (RCG.getSizes(N).second) {
6012     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6013         CGF, CGM.getContext().getSizeType(),
6014         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6015     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6016                                 CGM.getContext().getSizeType(), Loc);
6017   }
6018   RCG.emitAggregateType(CGF, N, Size);
6019   // Emit the finalizer body:
6020   // <destroy>(<type>* %0)
6021   RCG.emitCleanups(CGF, N, PrivateAddr);
6022   CGF.FinishFunction();
6023   return Fn;
6024 }
6025 
6026 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6027     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6028     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6029   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6030     return nullptr;
6031 
6032   // Build typedef struct:
6033   // kmp_task_red_input {
6034   //   void *reduce_shar; // shared reduction item
6035   //   size_t reduce_size; // size of data item
6036   //   void *reduce_init; // data initialization routine
6037   //   void *reduce_fini; // data finalization routine
6038   //   void *reduce_comb; // data combiner routine
6039   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6040   // } kmp_task_red_input_t;
6041   ASTContext &C = CGM.getContext();
6042   RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t");
6043   RD->startDefinition();
6044   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6045   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6046   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6047   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6048   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6049   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6050       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6051   RD->completeDefinition();
6052   QualType RDType = C.getRecordType(RD);
6053   unsigned Size = Data.ReductionVars.size();
6054   llvm::APInt ArraySize(/*numBits=*/64, Size);
6055   QualType ArrayRDType = C.getConstantArrayType(
6056       RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0);
6057   // kmp_task_red_input_t .rd_input.[Size];
6058   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6059   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
6060                        Data.ReductionOps);
6061   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6062     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6063     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6064                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6065     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6066         TaskRedInput.getPointer(), Idxs,
6067         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6068         ".rd_input.gep.");
6069     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6070     // ElemLVal.reduce_shar = &Shareds[Cnt];
6071     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6072     RCG.emitSharedLValue(CGF, Cnt);
6073     llvm::Value *CastedShared =
6074         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer());
6075     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6076     RCG.emitAggregateType(CGF, Cnt);
6077     llvm::Value *SizeValInChars;
6078     llvm::Value *SizeVal;
6079     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6080     // We use delayed creation/initialization for VLAs, array sections and
6081     // custom reduction initializations. It is required because runtime does not
6082     // provide the way to pass the sizes of VLAs/array sections to
6083     // initializer/combiner/finalizer functions and does not pass the pointer to
6084     // original reduction item to the initializer. Instead threadprivate global
6085     // variables are used to store these values and use them in the functions.
6086     bool DelayedCreation = !!SizeVal;
6087     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6088                                                /*isSigned=*/false);
6089     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6090     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6091     // ElemLVal.reduce_init = init;
6092     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6093     llvm::Value *InitAddr =
6094         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6095     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6096     DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
6097     // ElemLVal.reduce_fini = fini;
6098     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6099     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6100     llvm::Value *FiniAddr = Fini
6101                                 ? CGF.EmitCastToVoidPtr(Fini)
6102                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6103     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6104     // ElemLVal.reduce_comb = comb;
6105     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6106     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6107         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6108         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6109     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6110     // ElemLVal.flags = 0;
6111     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6112     if (DelayedCreation) {
6113       CGF.EmitStoreOfScalar(
6114           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*IsSigned=*/true),
6115           FlagsLVal);
6116     } else
6117       CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
6118   }
6119   // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
6120   // *data);
6121   llvm::Value *Args[] = {
6122       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6123                                 /*isSigned=*/true),
6124       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6125       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6126                                                       CGM.VoidPtrTy)};
6127   return CGF.EmitRuntimeCall(
6128       createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args);
6129 }
6130 
6131 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6132                                               SourceLocation Loc,
6133                                               ReductionCodeGen &RCG,
6134                                               unsigned N) {
6135   auto Sizes = RCG.getSizes(N);
6136   // Emit threadprivate global variable if the type is non-constant
6137   // (Sizes.second = nullptr).
6138   if (Sizes.second) {
6139     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6140                                                      /*isSigned=*/false);
6141     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6142         CGF, CGM.getContext().getSizeType(),
6143         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6144     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6145   }
6146   // Store address of the original reduction item if custom initializer is used.
6147   if (RCG.usesReductionInitializer(N)) {
6148     Address SharedAddr = getAddrOfArtificialThreadPrivate(
6149         CGF, CGM.getContext().VoidPtrTy,
6150         generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6151     CGF.Builder.CreateStore(
6152         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6153             RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy),
6154         SharedAddr, /*IsVolatile=*/false);
6155   }
6156 }
6157 
6158 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6159                                               SourceLocation Loc,
6160                                               llvm::Value *ReductionsPtr,
6161                                               LValue SharedLVal) {
6162   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6163   // *d);
6164   llvm::Value *Args[] = {
6165       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6166                                 /*isSigned=*/true),
6167       ReductionsPtr,
6168       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(),
6169                                                       CGM.VoidPtrTy)};
6170   return Address(
6171       CGF.EmitRuntimeCall(
6172           createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
6173       SharedLVal.getAlignment());
6174 }
6175 
6176 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6177                                        SourceLocation Loc) {
6178   if (!CGF.HaveInsertPoint())
6179     return;
6180   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6181   // global_tid);
6182   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6183   // Ignore return result until untied tasks are supported.
6184   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
6185   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6186     Region->emitUntiedSwitch(CGF);
6187 }
6188 
6189 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6190                                            OpenMPDirectiveKind InnerKind,
6191                                            const RegionCodeGenTy &CodeGen,
6192                                            bool HasCancel) {
6193   if (!CGF.HaveInsertPoint())
6194     return;
6195   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6196   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6197 }
6198 
6199 namespace {
6200 enum RTCancelKind {
6201   CancelNoreq = 0,
6202   CancelParallel = 1,
6203   CancelLoop = 2,
6204   CancelSections = 3,
6205   CancelTaskgroup = 4
6206 };
6207 } // anonymous namespace
6208 
6209 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6210   RTCancelKind CancelKind = CancelNoreq;
6211   if (CancelRegion == OMPD_parallel)
6212     CancelKind = CancelParallel;
6213   else if (CancelRegion == OMPD_for)
6214     CancelKind = CancelLoop;
6215   else if (CancelRegion == OMPD_sections)
6216     CancelKind = CancelSections;
6217   else {
6218     assert(CancelRegion == OMPD_taskgroup);
6219     CancelKind = CancelTaskgroup;
6220   }
6221   return CancelKind;
6222 }
6223 
6224 void CGOpenMPRuntime::emitCancellationPointCall(
6225     CodeGenFunction &CGF, SourceLocation Loc,
6226     OpenMPDirectiveKind CancelRegion) {
6227   if (!CGF.HaveInsertPoint())
6228     return;
6229   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6230   // global_tid, kmp_int32 cncl_kind);
6231   if (auto *OMPRegionInfo =
6232           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6233     // For 'cancellation point taskgroup', the task region info may not have a
6234     // cancel. This may instead happen in another adjacent task.
6235     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6236       llvm::Value *Args[] = {
6237           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6238           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6239       // Ignore return result until untied tasks are supported.
6240       llvm::Value *Result = CGF.EmitRuntimeCall(
6241           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
6242       // if (__kmpc_cancellationpoint()) {
6243       //   exit from construct;
6244       // }
6245       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6246       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6247       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6248       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6249       CGF.EmitBlock(ExitBB);
6250       // exit from construct;
6251       CodeGenFunction::JumpDest CancelDest =
6252           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6253       CGF.EmitBranchThroughCleanup(CancelDest);
6254       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6255     }
6256   }
6257 }
6258 
6259 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6260                                      const Expr *IfCond,
6261                                      OpenMPDirectiveKind CancelRegion) {
6262   if (!CGF.HaveInsertPoint())
6263     return;
6264   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6265   // kmp_int32 cncl_kind);
6266   if (auto *OMPRegionInfo =
6267           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6268     auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
6269                                                         PrePostActionTy &) {
6270       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6271       llvm::Value *Args[] = {
6272           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6273           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6274       // Ignore return result until untied tasks are supported.
6275       llvm::Value *Result = CGF.EmitRuntimeCall(
6276           RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
6277       // if (__kmpc_cancel()) {
6278       //   exit from construct;
6279       // }
6280       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6281       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6282       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6283       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6284       CGF.EmitBlock(ExitBB);
6285       // exit from construct;
6286       CodeGenFunction::JumpDest CancelDest =
6287           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6288       CGF.EmitBranchThroughCleanup(CancelDest);
6289       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6290     };
6291     if (IfCond) {
6292       emitOMPIfClause(CGF, IfCond, ThenGen,
6293                       [](CodeGenFunction &, PrePostActionTy &) {});
6294     } else {
6295       RegionCodeGenTy ThenRCG(ThenGen);
6296       ThenRCG(CGF);
6297     }
6298   }
6299 }
6300 
6301 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6302     const OMPExecutableDirective &D, StringRef ParentName,
6303     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6304     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6305   assert(!ParentName.empty() && "Invalid target region parent name!");
6306   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6307                                    IsOffloadEntry, CodeGen);
6308 }
6309 
6310 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6311     const OMPExecutableDirective &D, StringRef ParentName,
6312     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6313     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6314   // Create a unique name for the entry function using the source location
6315   // information of the current target region. The name will be something like:
6316   //
6317   // __omp_offloading_DD_FFFF_PP_lBB
6318   //
6319   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6320   // mangled name of the function that encloses the target region and BB is the
6321   // line number of the target region.
6322 
6323   unsigned DeviceID;
6324   unsigned FileID;
6325   unsigned Line;
6326   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6327                            Line);
6328   SmallString<64> EntryFnName;
6329   {
6330     llvm::raw_svector_ostream OS(EntryFnName);
6331     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6332        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6333   }
6334 
6335   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6336 
6337   CodeGenFunction CGF(CGM, true);
6338   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6339   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6340 
6341   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
6342 
6343   // If this target outline function is not an offload entry, we don't need to
6344   // register it.
6345   if (!IsOffloadEntry)
6346     return;
6347 
6348   // The target region ID is used by the runtime library to identify the current
6349   // target region, so it only has to be unique and not necessarily point to
6350   // anything. It could be the pointer to the outlined function that implements
6351   // the target region, but we aren't using that so that the compiler doesn't
6352   // need to keep that, and could therefore inline the host function if proven
6353   // worthwhile during optimization. In the other hand, if emitting code for the
6354   // device, the ID has to be the function address so that it can retrieved from
6355   // the offloading entry and launched by the runtime library. We also mark the
6356   // outlined function to have external linkage in case we are emitting code for
6357   // the device, because these functions will be entry points to the device.
6358 
6359   if (CGM.getLangOpts().OpenMPIsDevice) {
6360     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6361     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6362     OutlinedFn->setDSOLocal(false);
6363   } else {
6364     std::string Name = getName({EntryFnName, "region_id"});
6365     OutlinedFnID = new llvm::GlobalVariable(
6366         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6367         llvm::GlobalValue::WeakAnyLinkage,
6368         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6369   }
6370 
6371   // Register the information for the entry associated with this target region.
6372   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6373       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6374       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6375 }
6376 
6377 /// discard all CompoundStmts intervening between two constructs
6378 static const Stmt *ignoreCompoundStmts(const Stmt *Body) {
6379   while (const auto *CS = dyn_cast_or_null<CompoundStmt>(Body))
6380     Body = CS->body_front();
6381 
6382   return Body;
6383 }
6384 
6385 /// Emit the number of teams for a target directive.  Inspect the num_teams
6386 /// clause associated with a teams construct combined or closely nested
6387 /// with the target directive.
6388 ///
6389 /// Emit a team of size one for directives such as 'target parallel' that
6390 /// have no associated teams construct.
6391 ///
6392 /// Otherwise, return nullptr.
6393 static llvm::Value *
6394 emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
6395                                CodeGenFunction &CGF,
6396                                const OMPExecutableDirective &D) {
6397   assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
6398                                               "teams directive expected to be "
6399                                               "emitted only for the host!");
6400 
6401   CGBuilderTy &Bld = CGF.Builder;
6402 
6403   // If the target directive is combined with a teams directive:
6404   //   Return the value in the num_teams clause, if any.
6405   //   Otherwise, return 0 to denote the runtime default.
6406   if (isOpenMPTeamsDirective(D.getDirectiveKind())) {
6407     if (const auto *NumTeamsClause = D.getSingleClause<OMPNumTeamsClause>()) {
6408       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6409       llvm::Value *NumTeams = CGF.EmitScalarExpr(NumTeamsClause->getNumTeams(),
6410                                                  /*IgnoreResultAssign*/ true);
6411       return Bld.CreateIntCast(NumTeams, CGF.Int32Ty,
6412                                /*IsSigned=*/true);
6413     }
6414 
6415     // The default value is 0.
6416     return Bld.getInt32(0);
6417   }
6418 
6419   // If the target directive is combined with a parallel directive but not a
6420   // teams directive, start one team.
6421   if (isOpenMPParallelDirective(D.getDirectiveKind()))
6422     return Bld.getInt32(1);
6423 
6424   // If the current target region has a teams region enclosed, we need to get
6425   // the number of teams to pass to the runtime function call. This is done
6426   // by generating the expression in a inlined region. This is required because
6427   // the expression is captured in the enclosing target environment when the
6428   // teams directive is not combined with target.
6429 
6430   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6431 
6432   if (const auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>(
6433           ignoreCompoundStmts(CS.getCapturedStmt()))) {
6434     if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) {
6435       if (const auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) {
6436         CGOpenMPInnerExprInfo CGInfo(CGF, CS);
6437         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6438         llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams());
6439         return Bld.CreateIntCast(NumTeams, CGF.Int32Ty,
6440                                  /*IsSigned=*/true);
6441       }
6442 
6443       // If we have an enclosed teams directive but no num_teams clause we use
6444       // the default value 0.
6445       return Bld.getInt32(0);
6446     }
6447   }
6448 
6449   // No teams associated with the directive.
6450   return nullptr;
6451 }
6452 
6453 /// Emit the number of threads for a target directive.  Inspect the
6454 /// thread_limit clause associated with a teams construct combined or closely
6455 /// nested with the target directive.
6456 ///
6457 /// Emit the num_threads clause for directives such as 'target parallel' that
6458 /// have no associated teams construct.
6459 ///
6460 /// Otherwise, return nullptr.
6461 static llvm::Value *
6462 emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
6463                                  CodeGenFunction &CGF,
6464                                  const OMPExecutableDirective &D) {
6465   assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
6466                                               "teams directive expected to be "
6467                                               "emitted only for the host!");
6468 
6469   CGBuilderTy &Bld = CGF.Builder;
6470 
6471   //
6472   // If the target directive is combined with a teams directive:
6473   //   Return the value in the thread_limit clause, if any.
6474   //
6475   // If the target directive is combined with a parallel directive:
6476   //   Return the value in the num_threads clause, if any.
6477   //
6478   // If both clauses are set, select the minimum of the two.
6479   //
6480   // If neither teams or parallel combined directives set the number of threads
6481   // in a team, return 0 to denote the runtime default.
6482   //
6483   // If this is not a teams directive return nullptr.
6484 
6485   if (isOpenMPTeamsDirective(D.getDirectiveKind()) ||
6486       isOpenMPParallelDirective(D.getDirectiveKind())) {
6487     llvm::Value *DefaultThreadLimitVal = Bld.getInt32(0);
6488     llvm::Value *NumThreadsVal = nullptr;
6489     llvm::Value *ThreadLimitVal = nullptr;
6490 
6491     if (const auto *ThreadLimitClause =
6492             D.getSingleClause<OMPThreadLimitClause>()) {
6493       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6494       llvm::Value *ThreadLimit =
6495           CGF.EmitScalarExpr(ThreadLimitClause->getThreadLimit(),
6496                              /*IgnoreResultAssign*/ true);
6497       ThreadLimitVal = Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty,
6498                                          /*IsSigned=*/true);
6499     }
6500 
6501     if (const auto *NumThreadsClause =
6502             D.getSingleClause<OMPNumThreadsClause>()) {
6503       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6504       llvm::Value *NumThreads =
6505           CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
6506                              /*IgnoreResultAssign*/ true);
6507       NumThreadsVal =
6508           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/true);
6509     }
6510 
6511     // Select the lesser of thread_limit and num_threads.
6512     if (NumThreadsVal)
6513       ThreadLimitVal = ThreadLimitVal
6514                            ? Bld.CreateSelect(Bld.CreateICmpSLT(NumThreadsVal,
6515                                                                 ThreadLimitVal),
6516                                               NumThreadsVal, ThreadLimitVal)
6517                            : NumThreadsVal;
6518 
6519     // Set default value passed to the runtime if either teams or a target
6520     // parallel type directive is found but no clause is specified.
6521     if (!ThreadLimitVal)
6522       ThreadLimitVal = DefaultThreadLimitVal;
6523 
6524     return ThreadLimitVal;
6525   }
6526 
6527   // If the current target region has a teams region enclosed, we need to get
6528   // the thread limit to pass to the runtime function call. This is done
6529   // by generating the expression in a inlined region. This is required because
6530   // the expression is captured in the enclosing target environment when the
6531   // teams directive is not combined with target.
6532 
6533   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6534 
6535   if (const auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>(
6536           ignoreCompoundStmts(CS.getCapturedStmt()))) {
6537     if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) {
6538       if (const auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) {
6539         CGOpenMPInnerExprInfo CGInfo(CGF, CS);
6540         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6541         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit());
6542         return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty,
6543                                          /*IsSigned=*/true);
6544       }
6545 
6546       // If we have an enclosed teams directive but no thread_limit clause we
6547       // use the default value 0.
6548       return CGF.Builder.getInt32(0);
6549     }
6550   }
6551 
6552   // No teams associated with the directive.
6553   return nullptr;
6554 }
6555 
6556 namespace {
6557 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6558 
6559 // Utility to handle information from clauses associated with a given
6560 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6561 // It provides a convenient interface to obtain the information and generate
6562 // code for that information.
6563 class MappableExprsHandler {
6564 public:
6565   /// Values for bit flags used to specify the mapping type for
6566   /// offloading.
6567   enum OpenMPOffloadMappingFlags : uint64_t {
6568     /// No flags
6569     OMP_MAP_NONE = 0x0,
6570     /// Allocate memory on the device and move data from host to device.
6571     OMP_MAP_TO = 0x01,
6572     /// Allocate memory on the device and move data from device to host.
6573     OMP_MAP_FROM = 0x02,
6574     /// Always perform the requested mapping action on the element, even
6575     /// if it was already mapped before.
6576     OMP_MAP_ALWAYS = 0x04,
6577     /// Delete the element from the device environment, ignoring the
6578     /// current reference count associated with the element.
6579     OMP_MAP_DELETE = 0x08,
6580     /// The element being mapped is a pointer-pointee pair; both the
6581     /// pointer and the pointee should be mapped.
6582     OMP_MAP_PTR_AND_OBJ = 0x10,
6583     /// This flags signals that the base address of an entry should be
6584     /// passed to the target kernel as an argument.
6585     OMP_MAP_TARGET_PARAM = 0x20,
6586     /// Signal that the runtime library has to return the device pointer
6587     /// in the current position for the data being mapped. Used when we have the
6588     /// use_device_ptr clause.
6589     OMP_MAP_RETURN_PARAM = 0x40,
6590     /// This flag signals that the reference being passed is a pointer to
6591     /// private data.
6592     OMP_MAP_PRIVATE = 0x80,
6593     /// Pass the element to the device by value.
6594     OMP_MAP_LITERAL = 0x100,
6595     /// Implicit map
6596     OMP_MAP_IMPLICIT = 0x200,
6597     /// The 16 MSBs of the flags indicate whether the entry is member of some
6598     /// struct/class.
6599     OMP_MAP_MEMBER_OF = 0xffff000000000000,
6600     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
6601   };
6602 
6603   /// Class that associates information with a base pointer to be passed to the
6604   /// runtime library.
6605   class BasePointerInfo {
6606     /// The base pointer.
6607     llvm::Value *Ptr = nullptr;
6608     /// The base declaration that refers to this device pointer, or null if
6609     /// there is none.
6610     const ValueDecl *DevPtrDecl = nullptr;
6611 
6612   public:
6613     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
6614         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
6615     llvm::Value *operator*() const { return Ptr; }
6616     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
6617     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
6618   };
6619 
6620   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
6621   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
6622   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
6623 
6624   /// Map between a struct and the its lowest & highest elements which have been
6625   /// mapped.
6626   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6627   ///                    HE(FieldIndex, Pointer)}
6628   struct StructRangeInfoTy {
6629     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
6630         0, Address::invalid()};
6631     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
6632         0, Address::invalid()};
6633     Address Base = Address::invalid();
6634   };
6635 
6636 private:
6637   /// Kind that defines how a device pointer has to be returned.
6638   struct MapInfo {
6639     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
6640     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
6641     OpenMPMapClauseKind MapTypeModifier = OMPC_MAP_unknown;
6642     bool ReturnDevicePointer = false;
6643     bool IsImplicit = false;
6644 
6645     MapInfo() = default;
6646     MapInfo(
6647         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6648         OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier,
6649         bool ReturnDevicePointer, bool IsImplicit)
6650         : Components(Components), MapType(MapType),
6651           MapTypeModifier(MapTypeModifier),
6652           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
6653   };
6654 
6655   /// If use_device_ptr is used on a pointer which is a struct member and there
6656   /// is no map information about it, then emission of that entry is deferred
6657   /// until the whole struct has been processed.
6658   struct DeferredDevicePtrEntryTy {
6659     const Expr *IE = nullptr;
6660     const ValueDecl *VD = nullptr;
6661 
6662     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD)
6663         : IE(IE), VD(VD) {}
6664   };
6665 
6666   /// Directive from where the map clauses were extracted.
6667   const OMPExecutableDirective &CurDir;
6668 
6669   /// Function the directive is being generated for.
6670   CodeGenFunction &CGF;
6671 
6672   /// Set of all first private variables in the current directive.
6673   llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls;
6674 
6675   /// Map between device pointer declarations and their expression components.
6676   /// The key value for declarations in 'this' is null.
6677   llvm::DenseMap<
6678       const ValueDecl *,
6679       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6680       DevPointersMap;
6681 
6682   llvm::Value *getExprTypeSize(const Expr *E) const {
6683     QualType ExprTy = E->getType().getCanonicalType();
6684 
6685     // Reference types are ignored for mapping purposes.
6686     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
6687       ExprTy = RefTy->getPointeeType().getCanonicalType();
6688 
6689     // Given that an array section is considered a built-in type, we need to
6690     // do the calculation based on the length of the section instead of relying
6691     // on CGF.getTypeSize(E->getType()).
6692     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
6693       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
6694                             OAE->getBase()->IgnoreParenImpCasts())
6695                             .getCanonicalType();
6696 
6697       // If there is no length associated with the expression, that means we
6698       // are using the whole length of the base.
6699       if (!OAE->getLength() && OAE->getColonLoc().isValid())
6700         return CGF.getTypeSize(BaseTy);
6701 
6702       llvm::Value *ElemSize;
6703       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
6704         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
6705       } else {
6706         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
6707         assert(ATy && "Expecting array type if not a pointer type.");
6708         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
6709       }
6710 
6711       // If we don't have a length at this point, that is because we have an
6712       // array section with a single element.
6713       if (!OAE->getLength())
6714         return ElemSize;
6715 
6716       llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength());
6717       LengthVal =
6718           CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false);
6719       return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
6720     }
6721     return CGF.getTypeSize(ExprTy);
6722   }
6723 
6724   /// Return the corresponding bits for a given map clause modifier. Add
6725   /// a flag marking the map as a pointer if requested. Add a flag marking the
6726   /// map as the first one of a series of maps that relate to the same map
6727   /// expression.
6728   OpenMPOffloadMappingFlags getMapTypeBits(OpenMPMapClauseKind MapType,
6729                                            OpenMPMapClauseKind MapTypeModifier,
6730                                            bool IsImplicit, bool AddPtrFlag,
6731                                            bool AddIsTargetParamFlag) const {
6732     OpenMPOffloadMappingFlags Bits =
6733         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
6734     switch (MapType) {
6735     case OMPC_MAP_alloc:
6736     case OMPC_MAP_release:
6737       // alloc and release is the default behavior in the runtime library,  i.e.
6738       // if we don't pass any bits alloc/release that is what the runtime is
6739       // going to do. Therefore, we don't need to signal anything for these two
6740       // type modifiers.
6741       break;
6742     case OMPC_MAP_to:
6743       Bits |= OMP_MAP_TO;
6744       break;
6745     case OMPC_MAP_from:
6746       Bits |= OMP_MAP_FROM;
6747       break;
6748     case OMPC_MAP_tofrom:
6749       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
6750       break;
6751     case OMPC_MAP_delete:
6752       Bits |= OMP_MAP_DELETE;
6753       break;
6754     case OMPC_MAP_always:
6755     case OMPC_MAP_unknown:
6756       llvm_unreachable("Unexpected map type!");
6757     }
6758     if (AddPtrFlag)
6759       Bits |= OMP_MAP_PTR_AND_OBJ;
6760     if (AddIsTargetParamFlag)
6761       Bits |= OMP_MAP_TARGET_PARAM;
6762     if (MapTypeModifier == OMPC_MAP_always)
6763       Bits |= OMP_MAP_ALWAYS;
6764     return Bits;
6765   }
6766 
6767   /// Return true if the provided expression is a final array section. A
6768   /// final array section, is one whose length can't be proved to be one.
6769   bool isFinalArraySectionExpression(const Expr *E) const {
6770     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
6771 
6772     // It is not an array section and therefore not a unity-size one.
6773     if (!OASE)
6774       return false;
6775 
6776     // An array section with no colon always refer to a single element.
6777     if (OASE->getColonLoc().isInvalid())
6778       return false;
6779 
6780     const Expr *Length = OASE->getLength();
6781 
6782     // If we don't have a length we have to check if the array has size 1
6783     // for this dimension. Also, we should always expect a length if the
6784     // base type is pointer.
6785     if (!Length) {
6786       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
6787                              OASE->getBase()->IgnoreParenImpCasts())
6788                              .getCanonicalType();
6789       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
6790         return ATy->getSize().getSExtValue() != 1;
6791       // If we don't have a constant dimension length, we have to consider
6792       // the current section as having any size, so it is not necessarily
6793       // unitary. If it happen to be unity size, that's user fault.
6794       return true;
6795     }
6796 
6797     // Check if the length evaluates to 1.
6798     llvm::APSInt ConstLength;
6799     if (!Length->EvaluateAsInt(ConstLength, CGF.getContext()))
6800       return true; // Can have more that size 1.
6801 
6802     return ConstLength.getSExtValue() != 1;
6803   }
6804 
6805   /// Generate the base pointers, section pointers, sizes and map type
6806   /// bits for the provided map type, map modifier, and expression components.
6807   /// \a IsFirstComponent should be set to true if the provided set of
6808   /// components is the first associated with a capture.
6809   void generateInfoForComponentList(
6810       OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier,
6811       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6812       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
6813       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
6814       StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
6815       bool IsImplicit,
6816       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
6817           OverlappedElements = llvm::None) const {
6818     // The following summarizes what has to be generated for each map and the
6819     // types below. The generated information is expressed in this order:
6820     // base pointer, section pointer, size, flags
6821     // (to add to the ones that come from the map type and modifier).
6822     //
6823     // double d;
6824     // int i[100];
6825     // float *p;
6826     //
6827     // struct S1 {
6828     //   int i;
6829     //   float f[50];
6830     // }
6831     // struct S2 {
6832     //   int i;
6833     //   float f[50];
6834     //   S1 s;
6835     //   double *p;
6836     //   struct S2 *ps;
6837     // }
6838     // S2 s;
6839     // S2 *ps;
6840     //
6841     // map(d)
6842     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
6843     //
6844     // map(i)
6845     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
6846     //
6847     // map(i[1:23])
6848     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
6849     //
6850     // map(p)
6851     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
6852     //
6853     // map(p[1:24])
6854     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
6855     //
6856     // map(s)
6857     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
6858     //
6859     // map(s.i)
6860     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
6861     //
6862     // map(s.s.f)
6863     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6864     //
6865     // map(s.p)
6866     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
6867     //
6868     // map(to: s.p[:22])
6869     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
6870     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
6871     // &(s.p), &(s.p[0]), 22*sizeof(double),
6872     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6873     // (*) alloc space for struct members, only this is a target parameter
6874     // (**) map the pointer (nothing to be mapped in this example) (the compiler
6875     //      optimizes this entry out, same in the examples below)
6876     // (***) map the pointee (map: to)
6877     //
6878     // map(s.ps)
6879     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6880     //
6881     // map(from: s.ps->s.i)
6882     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6883     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6884     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
6885     //
6886     // map(to: s.ps->ps)
6887     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6888     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6889     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
6890     //
6891     // map(s.ps->ps->ps)
6892     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6893     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6894     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6895     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6896     //
6897     // map(to: s.ps->ps->s.f[:22])
6898     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6899     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6900     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6901     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6902     //
6903     // map(ps)
6904     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
6905     //
6906     // map(ps->i)
6907     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
6908     //
6909     // map(ps->s.f)
6910     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6911     //
6912     // map(from: ps->p)
6913     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
6914     //
6915     // map(to: ps->p[:22])
6916     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
6917     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
6918     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
6919     //
6920     // map(ps->ps)
6921     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6922     //
6923     // map(from: ps->ps->s.i)
6924     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6925     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6926     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6927     //
6928     // map(from: ps->ps->ps)
6929     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6930     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6931     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6932     //
6933     // map(ps->ps->ps->ps)
6934     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6935     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6936     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6937     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6938     //
6939     // map(to: ps->ps->ps->s.f[:22])
6940     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6941     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6942     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6943     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6944     //
6945     // map(to: s.f[:22]) map(from: s.p[:33])
6946     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
6947     //     sizeof(double*) (**), TARGET_PARAM
6948     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
6949     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
6950     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6951     // (*) allocate contiguous space needed to fit all mapped members even if
6952     //     we allocate space for members not mapped (in this example,
6953     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
6954     //     them as well because they fall between &s.f[0] and &s.p)
6955     //
6956     // map(from: s.f[:22]) map(to: ps->p[:33])
6957     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
6958     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
6959     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
6960     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
6961     // (*) the struct this entry pertains to is the 2nd element in the list of
6962     //     arguments, hence MEMBER_OF(2)
6963     //
6964     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
6965     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
6966     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
6967     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
6968     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
6969     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
6970     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
6971     // (*) the struct this entry pertains to is the 4th element in the list
6972     //     of arguments, hence MEMBER_OF(4)
6973 
6974     // Track if the map information being generated is the first for a capture.
6975     bool IsCaptureFirstInfo = IsFirstComponentList;
6976     bool IsLink = false; // Is this variable a "declare target link"?
6977 
6978     // Scan the components from the base to the complete expression.
6979     auto CI = Components.rbegin();
6980     auto CE = Components.rend();
6981     auto I = CI;
6982 
6983     // Track if the map information being generated is the first for a list of
6984     // components.
6985     bool IsExpressionFirstInfo = true;
6986     Address BP = Address::invalid();
6987 
6988     if (isa<MemberExpr>(I->getAssociatedExpression())) {
6989       // The base is the 'this' pointer. The content of the pointer is going
6990       // to be the base of the field being mapped.
6991       BP = CGF.LoadCXXThisAddress();
6992     } else {
6993       // The base is the reference to the variable.
6994       // BP = &Var.
6995       BP = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress();
6996       if (const auto *VD =
6997               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
6998         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
6999                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD))
7000           if (*Res == OMPDeclareTargetDeclAttr::MT_Link) {
7001             IsLink = true;
7002             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD);
7003           }
7004       }
7005 
7006       // If the variable is a pointer and is being dereferenced (i.e. is not
7007       // the last component), the base has to be the pointer itself, not its
7008       // reference. References are ignored for mapping purposes.
7009       QualType Ty =
7010           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7011       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7012         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7013 
7014         // We do not need to generate individual map information for the
7015         // pointer, it can be associated with the combined storage.
7016         ++I;
7017       }
7018     }
7019 
7020     // Track whether a component of the list should be marked as MEMBER_OF some
7021     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7022     // in a component list should be marked as MEMBER_OF, all subsequent entries
7023     // do not belong to the base struct. E.g.
7024     // struct S2 s;
7025     // s.ps->ps->ps->f[:]
7026     //   (1) (2) (3) (4)
7027     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7028     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7029     // is the pointee of ps(2) which is not member of struct s, so it should not
7030     // be marked as such (it is still PTR_AND_OBJ).
7031     // The variable is initialized to false so that PTR_AND_OBJ entries which
7032     // are not struct members are not considered (e.g. array of pointers to
7033     // data).
7034     bool ShouldBeMemberOf = false;
7035 
7036     // Variable keeping track of whether or not we have encountered a component
7037     // in the component list which is a member expression. Useful when we have a
7038     // pointer or a final array section, in which case it is the previous
7039     // component in the list which tells us whether we have a member expression.
7040     // E.g. X.f[:]
7041     // While processing the final array section "[:]" it is "f" which tells us
7042     // whether we are dealing with a member of a declared struct.
7043     const MemberExpr *EncounteredME = nullptr;
7044 
7045     for (; I != CE; ++I) {
7046       // If the current component is member of a struct (parent struct) mark it.
7047       if (!EncounteredME) {
7048         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7049         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7050         // as MEMBER_OF the parent struct.
7051         if (EncounteredME)
7052           ShouldBeMemberOf = true;
7053       }
7054 
7055       auto Next = std::next(I);
7056 
7057       // We need to generate the addresses and sizes if this is the last
7058       // component, if the component is a pointer or if it is an array section
7059       // whose length can't be proved to be one. If this is a pointer, it
7060       // becomes the base address for the following components.
7061 
7062       // A final array section, is one whose length can't be proved to be one.
7063       bool IsFinalArraySection =
7064           isFinalArraySectionExpression(I->getAssociatedExpression());
7065 
7066       // Get information on whether the element is a pointer. Have to do a
7067       // special treatment for array sections given that they are built-in
7068       // types.
7069       const auto *OASE =
7070           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7071       bool IsPointer =
7072           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7073                        .getCanonicalType()
7074                        ->isAnyPointerType()) ||
7075           I->getAssociatedExpression()->getType()->isAnyPointerType();
7076 
7077       if (Next == CE || IsPointer || IsFinalArraySection) {
7078         // If this is not the last component, we expect the pointer to be
7079         // associated with an array expression or member expression.
7080         assert((Next == CE ||
7081                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7082                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7083                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
7084                "Unexpected expression");
7085 
7086         Address LB =
7087             CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress();
7088 
7089         // If this component is a pointer inside the base struct then we don't
7090         // need to create any entry for it - it will be combined with the object
7091         // it is pointing to into a single PTR_AND_OBJ entry.
7092         bool IsMemberPointer =
7093             IsPointer && EncounteredME &&
7094             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7095              EncounteredME);
7096         if (!OverlappedElements.empty()) {
7097           // Handle base element with the info for overlapped elements.
7098           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7099           assert(Next == CE &&
7100                  "Expected last element for the overlapped elements.");
7101           assert(!IsPointer &&
7102                  "Unexpected base element with the pointer type.");
7103           // Mark the whole struct as the struct that requires allocation on the
7104           // device.
7105           PartialStruct.LowestElem = {0, LB};
7106           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7107               I->getAssociatedExpression()->getType());
7108           Address HB = CGF.Builder.CreateConstGEP(
7109               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7110                                                               CGF.VoidPtrTy),
7111               TypeSize.getQuantity() - 1, CharUnits::One());
7112           PartialStruct.HighestElem = {
7113               std::numeric_limits<decltype(
7114                   PartialStruct.HighestElem.first)>::max(),
7115               HB};
7116           PartialStruct.Base = BP;
7117           // Emit data for non-overlapped data.
7118           OpenMPOffloadMappingFlags Flags =
7119               OMP_MAP_MEMBER_OF |
7120               getMapTypeBits(MapType, MapTypeModifier, IsImplicit,
7121                              /*AddPtrFlag=*/false,
7122                              /*AddIsTargetParamFlag=*/false);
7123           LB = BP;
7124           llvm::Value *Size = nullptr;
7125           // Do bitcopy of all non-overlapped structure elements.
7126           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7127                    Component : OverlappedElements) {
7128             Address ComponentLB = Address::invalid();
7129             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7130                  Component) {
7131               if (MC.getAssociatedDeclaration()) {
7132                 ComponentLB =
7133                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7134                         .getAddress();
7135                 Size = CGF.Builder.CreatePtrDiff(
7136                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7137                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7138                 break;
7139               }
7140             }
7141             BasePointers.push_back(BP.getPointer());
7142             Pointers.push_back(LB.getPointer());
7143             Sizes.push_back(Size);
7144             Types.push_back(Flags);
7145             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1,
7146                                             CGF.getPointerSize());
7147           }
7148           BasePointers.push_back(BP.getPointer());
7149           Pointers.push_back(LB.getPointer());
7150           Size = CGF.Builder.CreatePtrDiff(
7151               CGF.EmitCastToVoidPtr(
7152                   CGF.Builder.CreateConstGEP(HB, 1, CharUnits::One())
7153                       .getPointer()),
7154               CGF.EmitCastToVoidPtr(LB.getPointer()));
7155           Sizes.push_back(Size);
7156           Types.push_back(Flags);
7157           break;
7158         }
7159         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7160         if (!IsMemberPointer) {
7161           BasePointers.push_back(BP.getPointer());
7162           Pointers.push_back(LB.getPointer());
7163           Sizes.push_back(Size);
7164 
7165           // We need to add a pointer flag for each map that comes from the
7166           // same expression except for the first one. We also need to signal
7167           // this map is the first one that relates with the current capture
7168           // (there is a set of entries for each capture).
7169           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7170               MapType, MapTypeModifier, IsImplicit,
7171               !IsExpressionFirstInfo || IsLink, IsCaptureFirstInfo && !IsLink);
7172 
7173           if (!IsExpressionFirstInfo) {
7174             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7175             // then we reset the TO/FROM/ALWAYS/DELETE flags.
7176             if (IsPointer)
7177               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7178                          OMP_MAP_DELETE);
7179 
7180             if (ShouldBeMemberOf) {
7181               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7182               // should be later updated with the correct value of MEMBER_OF.
7183               Flags |= OMP_MAP_MEMBER_OF;
7184               // From now on, all subsequent PTR_AND_OBJ entries should not be
7185               // marked as MEMBER_OF.
7186               ShouldBeMemberOf = false;
7187             }
7188           }
7189 
7190           Types.push_back(Flags);
7191         }
7192 
7193         // If we have encountered a member expression so far, keep track of the
7194         // mapped member. If the parent is "*this", then the value declaration
7195         // is nullptr.
7196         if (EncounteredME) {
7197           const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl());
7198           unsigned FieldIndex = FD->getFieldIndex();
7199 
7200           // Update info about the lowest and highest elements for this struct
7201           if (!PartialStruct.Base.isValid()) {
7202             PartialStruct.LowestElem = {FieldIndex, LB};
7203             PartialStruct.HighestElem = {FieldIndex, LB};
7204             PartialStruct.Base = BP;
7205           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7206             PartialStruct.LowestElem = {FieldIndex, LB};
7207           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7208             PartialStruct.HighestElem = {FieldIndex, LB};
7209           }
7210         }
7211 
7212         // If we have a final array section, we are done with this expression.
7213         if (IsFinalArraySection)
7214           break;
7215 
7216         // The pointer becomes the base for the next element.
7217         if (Next != CE)
7218           BP = LB;
7219 
7220         IsExpressionFirstInfo = false;
7221         IsCaptureFirstInfo = false;
7222       }
7223     }
7224   }
7225 
7226   /// Return the adjusted map modifiers if the declaration a capture refers to
7227   /// appears in a first-private clause. This is expected to be used only with
7228   /// directives that start with 'target'.
7229   MappableExprsHandler::OpenMPOffloadMappingFlags
7230   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7231     assert(Cap.capturesVariable() && "Expected capture by reference only!");
7232 
7233     // A first private variable captured by reference will use only the
7234     // 'private ptr' and 'map to' flag. Return the right flags if the captured
7235     // declaration is known as first-private in this handler.
7236     if (FirstPrivateDecls.count(Cap.getCapturedVar()))
7237       return MappableExprsHandler::OMP_MAP_PRIVATE |
7238              MappableExprsHandler::OMP_MAP_TO;
7239     return MappableExprsHandler::OMP_MAP_TO |
7240            MappableExprsHandler::OMP_MAP_FROM;
7241   }
7242 
7243   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7244     // Member of is given by the 16 MSB of the flag, so rotate by 48 bits.
7245     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7246                                                   << 48);
7247   }
7248 
7249   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7250                                      OpenMPOffloadMappingFlags MemberOfFlag) {
7251     // If the entry is PTR_AND_OBJ but has not been marked with the special
7252     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7253     // marked as MEMBER_OF.
7254     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
7255         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
7256       return;
7257 
7258     // Reset the placeholder value to prepare the flag for the assignment of the
7259     // proper MEMBER_OF value.
7260     Flags &= ~OMP_MAP_MEMBER_OF;
7261     Flags |= MemberOfFlag;
7262   }
7263 
7264   void getPlainLayout(const CXXRecordDecl *RD,
7265                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7266                       bool AsBase) const {
7267     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7268 
7269     llvm::StructType *St =
7270         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7271 
7272     unsigned NumElements = St->getNumElements();
7273     llvm::SmallVector<
7274         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7275         RecordLayout(NumElements);
7276 
7277     // Fill bases.
7278     for (const auto &I : RD->bases()) {
7279       if (I.isVirtual())
7280         continue;
7281       const auto *Base = I.getType()->getAsCXXRecordDecl();
7282       // Ignore empty bases.
7283       if (Base->isEmpty() || CGF.getContext()
7284                                  .getASTRecordLayout(Base)
7285                                  .getNonVirtualSize()
7286                                  .isZero())
7287         continue;
7288 
7289       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7290       RecordLayout[FieldIndex] = Base;
7291     }
7292     // Fill in virtual bases.
7293     for (const auto &I : RD->vbases()) {
7294       const auto *Base = I.getType()->getAsCXXRecordDecl();
7295       // Ignore empty bases.
7296       if (Base->isEmpty())
7297         continue;
7298       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7299       if (RecordLayout[FieldIndex])
7300         continue;
7301       RecordLayout[FieldIndex] = Base;
7302     }
7303     // Fill in all the fields.
7304     assert(!RD->isUnion() && "Unexpected union.");
7305     for (const auto *Field : RD->fields()) {
7306       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7307       // will fill in later.)
7308       if (!Field->isBitField()) {
7309         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7310         RecordLayout[FieldIndex] = Field;
7311       }
7312     }
7313     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7314              &Data : RecordLayout) {
7315       if (Data.isNull())
7316         continue;
7317       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7318         getPlainLayout(Base, Layout, /*AsBase=*/true);
7319       else
7320         Layout.push_back(Data.get<const FieldDecl *>());
7321     }
7322   }
7323 
7324 public:
7325   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
7326       : CurDir(Dir), CGF(CGF) {
7327     // Extract firstprivate clause information.
7328     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
7329       for (const auto *D : C->varlists())
7330         FirstPrivateDecls.insert(
7331             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
7332     // Extract device pointer clause information.
7333     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
7334       for (auto L : C->component_lists())
7335         DevPointersMap[L.first].push_back(L.second);
7336   }
7337 
7338   /// Generate code for the combined entry if we have a partially mapped struct
7339   /// and take care of the mapping flags of the arguments corresponding to
7340   /// individual struct members.
7341   void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
7342                          MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7343                          MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
7344                          const StructRangeInfoTy &PartialStruct) const {
7345     // Base is the base of the struct
7346     BasePointers.push_back(PartialStruct.Base.getPointer());
7347     // Pointer is the address of the lowest element
7348     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
7349     Pointers.push_back(LB);
7350     // Size is (addr of {highest+1} element) - (addr of lowest element)
7351     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
7352     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
7353     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
7354     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
7355     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
7356     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.SizeTy,
7357                                                   /*isSinged=*/false);
7358     Sizes.push_back(Size);
7359     // Map type is always TARGET_PARAM
7360     Types.push_back(OMP_MAP_TARGET_PARAM);
7361     // Remove TARGET_PARAM flag from the first element
7362     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
7363 
7364     // All other current entries will be MEMBER_OF the combined entry
7365     // (except for PTR_AND_OBJ entries which do not have a placeholder value
7366     // 0xFFFF in the MEMBER_OF field).
7367     OpenMPOffloadMappingFlags MemberOfFlag =
7368         getMemberOfFlag(BasePointers.size() - 1);
7369     for (auto &M : CurTypes)
7370       setCorrectMemberOfFlag(M, MemberOfFlag);
7371   }
7372 
7373   /// Generate all the base pointers, section pointers, sizes and map
7374   /// types for the extracted mappable expressions. Also, for each item that
7375   /// relates with a device pointer, a pair of the relevant declaration and
7376   /// index where it occurs is appended to the device pointers info array.
7377   void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
7378                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7379                        MapFlagsArrayTy &Types) const {
7380     // We have to process the component lists that relate with the same
7381     // declaration in a single chunk so that we can generate the map flags
7382     // correctly. Therefore, we organize all lists in a map.
7383     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
7384 
7385     // Helper function to fill the information map for the different supported
7386     // clauses.
7387     auto &&InfoGen = [&Info](
7388         const ValueDecl *D,
7389         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7390         OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier,
7391         bool ReturnDevicePointer, bool IsImplicit) {
7392       const ValueDecl *VD =
7393           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
7394       Info[VD].emplace_back(L, MapType, MapModifier, ReturnDevicePointer,
7395                             IsImplicit);
7396     };
7397 
7398     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
7399     for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
7400       for (const auto &L : C->component_lists()) {
7401         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier(),
7402             /*ReturnDevicePointer=*/false, C->isImplicit());
7403       }
7404     for (const auto *C : this->CurDir.getClausesOfKind<OMPToClause>())
7405       for (const auto &L : C->component_lists()) {
7406         InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown,
7407             /*ReturnDevicePointer=*/false, C->isImplicit());
7408       }
7409     for (const auto *C : this->CurDir.getClausesOfKind<OMPFromClause>())
7410       for (const auto &L : C->component_lists()) {
7411         InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown,
7412             /*ReturnDevicePointer=*/false, C->isImplicit());
7413       }
7414 
7415     // Look at the use_device_ptr clause information and mark the existing map
7416     // entries as such. If there is no map information for an entry in the
7417     // use_device_ptr list, we create one with map type 'alloc' and zero size
7418     // section. It is the user fault if that was not mapped before. If there is
7419     // no map information and the pointer is a struct member, then we defer the
7420     // emission of that entry until the whole struct has been processed.
7421     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
7422         DeferredInfo;
7423 
7424     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
7425     for (const auto *C :
7426         this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) {
7427       for (const auto &L : C->component_lists()) {
7428         assert(!L.second.empty() && "Not expecting empty list of components!");
7429         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
7430         VD = cast<ValueDecl>(VD->getCanonicalDecl());
7431         const Expr *IE = L.second.back().getAssociatedExpression();
7432         // If the first component is a member expression, we have to look into
7433         // 'this', which maps to null in the map of map information. Otherwise
7434         // look directly for the information.
7435         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7436 
7437         // We potentially have map information for this declaration already.
7438         // Look for the first set of components that refer to it.
7439         if (It != Info.end()) {
7440           auto CI = std::find_if(
7441               It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
7442                 return MI.Components.back().getAssociatedDeclaration() == VD;
7443               });
7444           // If we found a map entry, signal that the pointer has to be returned
7445           // and move on to the next declaration.
7446           if (CI != It->second.end()) {
7447             CI->ReturnDevicePointer = true;
7448             continue;
7449           }
7450         }
7451 
7452         // We didn't find any match in our map information - generate a zero
7453         // size array section - if the pointer is a struct member we defer this
7454         // action until the whole struct has been processed.
7455         // FIXME: MSVC 2013 seems to require this-> to find member CGF.
7456         if (isa<MemberExpr>(IE)) {
7457           // Insert the pointer into Info to be processed by
7458           // generateInfoForComponentList. Because it is a member pointer
7459           // without a pointee, no entry will be generated for it, therefore
7460           // we need to generate one after the whole struct has been processed.
7461           // Nonetheless, generateInfoForComponentList must be called to take
7462           // the pointer into account for the calculation of the range of the
7463           // partial struct.
7464           InfoGen(nullptr, L.second, OMPC_MAP_unknown, OMPC_MAP_unknown,
7465                   /*ReturnDevicePointer=*/false, C->isImplicit());
7466           DeferredInfo[nullptr].emplace_back(IE, VD);
7467         } else {
7468           llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
7469               this->CGF.EmitLValue(IE), IE->getExprLoc());
7470           BasePointers.emplace_back(Ptr, VD);
7471           Pointers.push_back(Ptr);
7472           Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy));
7473           Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
7474         }
7475       }
7476     }
7477 
7478     for (const auto &M : Info) {
7479       // We need to know when we generate information for the first component
7480       // associated with a capture, because the mapping flags depend on it.
7481       bool IsFirstComponentList = true;
7482 
7483       // Temporary versions of arrays
7484       MapBaseValuesArrayTy CurBasePointers;
7485       MapValuesArrayTy CurPointers;
7486       MapValuesArrayTy CurSizes;
7487       MapFlagsArrayTy CurTypes;
7488       StructRangeInfoTy PartialStruct;
7489 
7490       for (const MapInfo &L : M.second) {
7491         assert(!L.Components.empty() &&
7492                "Not expecting declaration with no component lists.");
7493 
7494         // Remember the current base pointer index.
7495         unsigned CurrentBasePointersIdx = CurBasePointers.size();
7496         // FIXME: MSVC 2013 seems to require this-> to find the member method.
7497         this->generateInfoForComponentList(
7498             L.MapType, L.MapTypeModifier, L.Components, CurBasePointers,
7499             CurPointers, CurSizes, CurTypes, PartialStruct,
7500             IsFirstComponentList, L.IsImplicit);
7501 
7502         // If this entry relates with a device pointer, set the relevant
7503         // declaration and add the 'return pointer' flag.
7504         if (L.ReturnDevicePointer) {
7505           assert(CurBasePointers.size() > CurrentBasePointersIdx &&
7506                  "Unexpected number of mapped base pointers.");
7507 
7508           const ValueDecl *RelevantVD =
7509               L.Components.back().getAssociatedDeclaration();
7510           assert(RelevantVD &&
7511                  "No relevant declaration related with device pointer??");
7512 
7513           CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
7514           CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
7515         }
7516         IsFirstComponentList = false;
7517       }
7518 
7519       // Append any pending zero-length pointers which are struct members and
7520       // used with use_device_ptr.
7521       auto CI = DeferredInfo.find(M.first);
7522       if (CI != DeferredInfo.end()) {
7523         for (const DeferredDevicePtrEntryTy &L : CI->second) {
7524           llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer();
7525           llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
7526               this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
7527           CurBasePointers.emplace_back(BasePtr, L.VD);
7528           CurPointers.push_back(Ptr);
7529           CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy));
7530           // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
7531           // value MEMBER_OF=FFFF so that the entry is later updated with the
7532           // correct value of MEMBER_OF.
7533           CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
7534                              OMP_MAP_MEMBER_OF);
7535         }
7536       }
7537 
7538       // If there is an entry in PartialStruct it means we have a struct with
7539       // individual members mapped. Emit an extra combined entry.
7540       if (PartialStruct.Base.isValid())
7541         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
7542                           PartialStruct);
7543 
7544       // We need to append the results of this capture to what we already have.
7545       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
7546       Pointers.append(CurPointers.begin(), CurPointers.end());
7547       Sizes.append(CurSizes.begin(), CurSizes.end());
7548       Types.append(CurTypes.begin(), CurTypes.end());
7549     }
7550   }
7551 
7552   /// Emit capture info for lambdas for variables captured by reference.
7553   void generateInfoForLambdaCaptures(
7554       const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
7555       MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7556       MapFlagsArrayTy &Types,
7557       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
7558     const auto *RD = VD->getType()
7559                          .getCanonicalType()
7560                          .getNonReferenceType()
7561                          ->getAsCXXRecordDecl();
7562     if (!RD || !RD->isLambda())
7563       return;
7564     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
7565     LValue VDLVal = CGF.MakeAddrLValue(
7566         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
7567     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
7568     FieldDecl *ThisCapture = nullptr;
7569     RD->getCaptureFields(Captures, ThisCapture);
7570     if (ThisCapture) {
7571       LValue ThisLVal =
7572           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
7573       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
7574       LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer());
7575       BasePointers.push_back(ThisLVal.getPointer());
7576       Pointers.push_back(ThisLValVal.getPointer());
7577       Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
7578       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
7579                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
7580     }
7581     for (const LambdaCapture &LC : RD->captures()) {
7582       if (LC.getCaptureKind() != LCK_ByRef)
7583         continue;
7584       const VarDecl *VD = LC.getCapturedVar();
7585       auto It = Captures.find(VD);
7586       assert(It != Captures.end() && "Found lambda capture without field.");
7587       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
7588       LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
7589       LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer());
7590       BasePointers.push_back(VarLVal.getPointer());
7591       Pointers.push_back(VarLValVal.getPointer());
7592       Sizes.push_back(CGF.getTypeSize(
7593           VD->getType().getCanonicalType().getNonReferenceType()));
7594       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
7595                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
7596     }
7597   }
7598 
7599   /// Set correct indices for lambdas captures.
7600   void adjustMemberOfForLambdaCaptures(
7601       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
7602       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
7603       MapFlagsArrayTy &Types) const {
7604     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
7605       // Set correct member_of idx for all implicit lambda captures.
7606       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
7607                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
7608         continue;
7609       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
7610       assert(BasePtr && "Unable to find base lambda address.");
7611       int TgtIdx = -1;
7612       for (unsigned J = I; J > 0; --J) {
7613         unsigned Idx = J - 1;
7614         if (Pointers[Idx] != BasePtr)
7615           continue;
7616         TgtIdx = Idx;
7617         break;
7618       }
7619       assert(TgtIdx != -1 && "Unable to find parent lambda.");
7620       // All other current entries will be MEMBER_OF the combined entry
7621       // (except for PTR_AND_OBJ entries which do not have a placeholder value
7622       // 0xFFFF in the MEMBER_OF field).
7623       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
7624       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
7625     }
7626   }
7627 
7628   /// Generate the base pointers, section pointers, sizes and map types
7629   /// associated to a given capture.
7630   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
7631                               llvm::Value *Arg,
7632                               MapBaseValuesArrayTy &BasePointers,
7633                               MapValuesArrayTy &Pointers,
7634                               MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
7635                               StructRangeInfoTy &PartialStruct) const {
7636     assert(!Cap->capturesVariableArrayType() &&
7637            "Not expecting to generate map info for a variable array type!");
7638 
7639     // We need to know when we generating information for the first component
7640     const ValueDecl *VD = Cap->capturesThis()
7641                               ? nullptr
7642                               : Cap->getCapturedVar()->getCanonicalDecl();
7643 
7644     // If this declaration appears in a is_device_ptr clause we just have to
7645     // pass the pointer by value. If it is a reference to a declaration, we just
7646     // pass its value.
7647     if (DevPointersMap.count(VD)) {
7648       BasePointers.emplace_back(Arg, VD);
7649       Pointers.push_back(Arg);
7650       Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
7651       Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
7652       return;
7653     }
7654 
7655     using MapData =
7656         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
7657                    OpenMPMapClauseKind, OpenMPMapClauseKind, bool>;
7658     SmallVector<MapData, 4> DeclComponentLists;
7659     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
7660     for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) {
7661       for (const auto &L : C->decl_component_lists(VD)) {
7662         assert(L.first == VD &&
7663                "We got information for the wrong declaration??");
7664         assert(!L.second.empty() &&
7665                "Not expecting declaration with no component lists.");
7666         DeclComponentLists.emplace_back(L.second, C->getMapType(),
7667                                         C->getMapTypeModifier(),
7668                                         C->isImplicit());
7669       }
7670     }
7671 
7672     // Find overlapping elements (including the offset from the base element).
7673     llvm::SmallDenseMap<
7674         const MapData *,
7675         llvm::SmallVector<
7676             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
7677         4>
7678         OverlappedData;
7679     size_t Count = 0;
7680     for (const MapData &L : DeclComponentLists) {
7681       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7682       OpenMPMapClauseKind MapType;
7683       OpenMPMapClauseKind MapTypeModifier;
7684       bool IsImplicit;
7685       std::tie(Components, MapType, MapTypeModifier, IsImplicit) = L;
7686       ++Count;
7687       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
7688         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
7689         std::tie(Components1, MapType, MapTypeModifier, IsImplicit) = L1;
7690         auto CI = Components.rbegin();
7691         auto CE = Components.rend();
7692         auto SI = Components1.rbegin();
7693         auto SE = Components1.rend();
7694         for (; CI != CE && SI != SE; ++CI, ++SI) {
7695           if (CI->getAssociatedExpression()->getStmtClass() !=
7696               SI->getAssociatedExpression()->getStmtClass())
7697             break;
7698           // Are we dealing with different variables/fields?
7699           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
7700             break;
7701         }
7702         // Found overlapping if, at least for one component, reached the head of
7703         // the components list.
7704         if (CI == CE || SI == SE) {
7705           assert((CI != CE || SI != SE) &&
7706                  "Unexpected full match of the mapping components.");
7707           const MapData &BaseData = CI == CE ? L : L1;
7708           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
7709               SI == SE ? Components : Components1;
7710           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
7711           OverlappedElements.getSecond().push_back(SubData);
7712         }
7713       }
7714     }
7715     // Sort the overlapped elements for each item.
7716     llvm::SmallVector<const FieldDecl *, 4> Layout;
7717     if (!OverlappedData.empty()) {
7718       if (const auto *CRD =
7719               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
7720         getPlainLayout(CRD, Layout, /*AsBase=*/false);
7721       else {
7722         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
7723         Layout.append(RD->field_begin(), RD->field_end());
7724       }
7725     }
7726     for (auto &Pair : OverlappedData) {
7727       llvm::sort(
7728           Pair.getSecond(),
7729           [&Layout](
7730               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
7731               OMPClauseMappableExprCommon::MappableExprComponentListRef
7732                   Second) {
7733             auto CI = First.rbegin();
7734             auto CE = First.rend();
7735             auto SI = Second.rbegin();
7736             auto SE = Second.rend();
7737             for (; CI != CE && SI != SE; ++CI, ++SI) {
7738               if (CI->getAssociatedExpression()->getStmtClass() !=
7739                   SI->getAssociatedExpression()->getStmtClass())
7740                 break;
7741               // Are we dealing with different variables/fields?
7742               if (CI->getAssociatedDeclaration() !=
7743                   SI->getAssociatedDeclaration())
7744                 break;
7745             }
7746 
7747             // Lists contain the same elements.
7748             if (CI == CE && SI == SE)
7749               return false;
7750 
7751             // List with less elements is less than list with more elements.
7752             if (CI == CE || SI == SE)
7753               return CI == CE;
7754 
7755             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
7756             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
7757             if (FD1->getParent() == FD2->getParent())
7758               return FD1->getFieldIndex() < FD2->getFieldIndex();
7759             const auto It =
7760                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
7761                   return FD == FD1 || FD == FD2;
7762                 });
7763             return *It == FD1;
7764           });
7765     }
7766 
7767     // Associated with a capture, because the mapping flags depend on it.
7768     // Go through all of the elements with the overlapped elements.
7769     for (const auto &Pair : OverlappedData) {
7770       const MapData &L = *Pair.getFirst();
7771       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7772       OpenMPMapClauseKind MapType;
7773       OpenMPMapClauseKind MapTypeModifier;
7774       bool IsImplicit;
7775       std::tie(Components, MapType, MapTypeModifier, IsImplicit) = L;
7776       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7777           OverlappedComponents = Pair.getSecond();
7778       bool IsFirstComponentList = true;
7779       generateInfoForComponentList(MapType, MapTypeModifier, Components,
7780                                    BasePointers, Pointers, Sizes, Types,
7781                                    PartialStruct, IsFirstComponentList,
7782                                    IsImplicit, OverlappedComponents);
7783     }
7784     // Go through other elements without overlapped elements.
7785     bool IsFirstComponentList = OverlappedData.empty();
7786     for (const MapData &L : DeclComponentLists) {
7787       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7788       OpenMPMapClauseKind MapType;
7789       OpenMPMapClauseKind MapTypeModifier;
7790       bool IsImplicit;
7791       std::tie(Components, MapType, MapTypeModifier, IsImplicit) = L;
7792       auto It = OverlappedData.find(&L);
7793       if (It == OverlappedData.end())
7794         generateInfoForComponentList(MapType, MapTypeModifier, Components,
7795                                      BasePointers, Pointers, Sizes, Types,
7796                                      PartialStruct, IsFirstComponentList,
7797                                      IsImplicit);
7798       IsFirstComponentList = false;
7799     }
7800   }
7801 
7802   /// Generate the base pointers, section pointers, sizes and map types
7803   /// associated with the declare target link variables.
7804   void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
7805                                         MapValuesArrayTy &Pointers,
7806                                         MapValuesArrayTy &Sizes,
7807                                         MapFlagsArrayTy &Types) const {
7808     // Map other list items in the map clause which are not captured variables
7809     // but "declare target link" global variables.,
7810     for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) {
7811       for (const auto &L : C->component_lists()) {
7812         if (!L.first)
7813           continue;
7814         const auto *VD = dyn_cast<VarDecl>(L.first);
7815         if (!VD)
7816           continue;
7817         llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7818             OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
7819         if (!Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
7820           continue;
7821         StructRangeInfoTy PartialStruct;
7822         generateInfoForComponentList(
7823             C->getMapType(), C->getMapTypeModifier(), L.second, BasePointers,
7824             Pointers, Sizes, Types, PartialStruct,
7825             /*IsFirstComponentList=*/true, C->isImplicit());
7826         assert(!PartialStruct.Base.isValid() &&
7827                "No partial structs for declare target link expected.");
7828       }
7829     }
7830   }
7831 
7832   /// Generate the default map information for a given capture \a CI,
7833   /// record field declaration \a RI and captured value \a CV.
7834   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
7835                               const FieldDecl &RI, llvm::Value *CV,
7836                               MapBaseValuesArrayTy &CurBasePointers,
7837                               MapValuesArrayTy &CurPointers,
7838                               MapValuesArrayTy &CurSizes,
7839                               MapFlagsArrayTy &CurMapTypes) const {
7840     // Do the default mapping.
7841     if (CI.capturesThis()) {
7842       CurBasePointers.push_back(CV);
7843       CurPointers.push_back(CV);
7844       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
7845       CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType()));
7846       // Default map type.
7847       CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
7848     } else if (CI.capturesVariableByCopy()) {
7849       CurBasePointers.push_back(CV);
7850       CurPointers.push_back(CV);
7851       if (!RI.getType()->isAnyPointerType()) {
7852         // We have to signal to the runtime captures passed by value that are
7853         // not pointers.
7854         CurMapTypes.push_back(OMP_MAP_LITERAL);
7855         CurSizes.push_back(CGF.getTypeSize(RI.getType()));
7856       } else {
7857         // Pointers are implicitly mapped with a zero size and no flags
7858         // (other than first map that is added for all implicit maps).
7859         CurMapTypes.push_back(OMP_MAP_NONE);
7860         CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy));
7861       }
7862     } else {
7863       assert(CI.capturesVariable() && "Expected captured reference.");
7864       CurBasePointers.push_back(CV);
7865       CurPointers.push_back(CV);
7866 
7867       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
7868       QualType ElementType = PtrTy->getPointeeType();
7869       CurSizes.push_back(CGF.getTypeSize(ElementType));
7870       // The default map type for a scalar/complex type is 'to' because by
7871       // default the value doesn't have to be retrieved. For an aggregate
7872       // type, the default is 'tofrom'.
7873       CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
7874     }
7875     // Every default map produces a single argument which is a target parameter.
7876     CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
7877 
7878     // Add flag stating this is an implicit map.
7879     CurMapTypes.back() |= OMP_MAP_IMPLICIT;
7880   }
7881 };
7882 
7883 enum OpenMPOffloadingReservedDeviceIDs {
7884   /// Device ID if the device was not defined, runtime should get it
7885   /// from environment variables in the spec.
7886   OMP_DEVICEID_UNDEF = -1,
7887 };
7888 } // anonymous namespace
7889 
7890 /// Emit the arrays used to pass the captures and map information to the
7891 /// offloading runtime library. If there is no map or capture information,
7892 /// return nullptr by reference.
7893 static void
7894 emitOffloadingArrays(CodeGenFunction &CGF,
7895                      MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
7896                      MappableExprsHandler::MapValuesArrayTy &Pointers,
7897                      MappableExprsHandler::MapValuesArrayTy &Sizes,
7898                      MappableExprsHandler::MapFlagsArrayTy &MapTypes,
7899                      CGOpenMPRuntime::TargetDataInfo &Info) {
7900   CodeGenModule &CGM = CGF.CGM;
7901   ASTContext &Ctx = CGF.getContext();
7902 
7903   // Reset the array information.
7904   Info.clearArrayInfo();
7905   Info.NumberOfPtrs = BasePointers.size();
7906 
7907   if (Info.NumberOfPtrs) {
7908     // Detect if we have any capture size requiring runtime evaluation of the
7909     // size so that a constant array could be eventually used.
7910     bool hasRuntimeEvaluationCaptureSize = false;
7911     for (llvm::Value *S : Sizes)
7912       if (!isa<llvm::Constant>(S)) {
7913         hasRuntimeEvaluationCaptureSize = true;
7914         break;
7915       }
7916 
7917     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
7918     QualType PointerArrayType =
7919         Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
7920                                  /*IndexTypeQuals=*/0);
7921 
7922     Info.BasePointersArray =
7923         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
7924     Info.PointersArray =
7925         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
7926 
7927     // If we don't have any VLA types or other types that require runtime
7928     // evaluation, we can use a constant array for the map sizes, otherwise we
7929     // need to fill up the arrays as we do for the pointers.
7930     if (hasRuntimeEvaluationCaptureSize) {
7931       QualType SizeArrayType = Ctx.getConstantArrayType(
7932           Ctx.getSizeType(), PointerNumAP, ArrayType::Normal,
7933           /*IndexTypeQuals=*/0);
7934       Info.SizesArray =
7935           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
7936     } else {
7937       // We expect all the sizes to be constant, so we collect them to create
7938       // a constant array.
7939       SmallVector<llvm::Constant *, 16> ConstSizes;
7940       for (llvm::Value *S : Sizes)
7941         ConstSizes.push_back(cast<llvm::Constant>(S));
7942 
7943       auto *SizesArrayInit = llvm::ConstantArray::get(
7944           llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes);
7945       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
7946       auto *SizesArrayGbl = new llvm::GlobalVariable(
7947           CGM.getModule(), SizesArrayInit->getType(),
7948           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
7949           SizesArrayInit, Name);
7950       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
7951       Info.SizesArray = SizesArrayGbl;
7952     }
7953 
7954     // The map types are always constant so we don't need to generate code to
7955     // fill arrays. Instead, we create an array constant.
7956     SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
7957     llvm::copy(MapTypes, Mapping.begin());
7958     llvm::Constant *MapTypesArrayInit =
7959         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
7960     std::string MaptypesName =
7961         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
7962     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
7963         CGM.getModule(), MapTypesArrayInit->getType(),
7964         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
7965         MapTypesArrayInit, MaptypesName);
7966     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
7967     Info.MapTypesArray = MapTypesArrayGbl;
7968 
7969     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
7970       llvm::Value *BPVal = *BasePointers[I];
7971       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
7972           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
7973           Info.BasePointersArray, 0, I);
7974       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7975           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
7976       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
7977       CGF.Builder.CreateStore(BPVal, BPAddr);
7978 
7979       if (Info.requiresDevicePointerInfo())
7980         if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
7981           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
7982 
7983       llvm::Value *PVal = Pointers[I];
7984       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
7985           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
7986           Info.PointersArray, 0, I);
7987       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7988           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
7989       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
7990       CGF.Builder.CreateStore(PVal, PAddr);
7991 
7992       if (hasRuntimeEvaluationCaptureSize) {
7993         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
7994             llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs),
7995             Info.SizesArray,
7996             /*Idx0=*/0,
7997             /*Idx1=*/I);
7998         Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType()));
7999         CGF.Builder.CreateStore(
8000             CGF.Builder.CreateIntCast(Sizes[I], CGM.SizeTy, /*isSigned=*/true),
8001             SAddr);
8002       }
8003     }
8004   }
8005 }
8006 /// Emit the arguments to be passed to the runtime library based on the
8007 /// arrays of pointers, sizes and map types.
8008 static void emitOffloadingArraysArgument(
8009     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8010     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8011     llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
8012   CodeGenModule &CGM = CGF.CGM;
8013   if (Info.NumberOfPtrs) {
8014     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8015         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8016         Info.BasePointersArray,
8017         /*Idx0=*/0, /*Idx1=*/0);
8018     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8019         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8020         Info.PointersArray,
8021         /*Idx0=*/0,
8022         /*Idx1=*/0);
8023     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8024         llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray,
8025         /*Idx0=*/0, /*Idx1=*/0);
8026     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8027         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8028         Info.MapTypesArray,
8029         /*Idx0=*/0,
8030         /*Idx1=*/0);
8031   } else {
8032     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8033     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8034     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo());
8035     MapTypesArrayArg =
8036         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8037   }
8038 }
8039 
8040 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
8041                                      const OMPExecutableDirective &D,
8042                                      llvm::Value *OutlinedFn,
8043                                      llvm::Value *OutlinedFnID,
8044                                      const Expr *IfCond, const Expr *Device) {
8045   if (!CGF.HaveInsertPoint())
8046     return;
8047 
8048   assert(OutlinedFn && "Invalid outlined function!");
8049 
8050   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
8051   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
8052   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
8053   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
8054                                             PrePostActionTy &) {
8055     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
8056   };
8057   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
8058 
8059   CodeGenFunction::OMPTargetDataInfo InputInfo;
8060   llvm::Value *MapTypesArray = nullptr;
8061   // Fill up the pointer arrays and transfer execution to the device.
8062   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
8063                     &MapTypesArray, &CS, RequiresOuterTask,
8064                     &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) {
8065     // On top of the arrays that were filled up, the target offloading call
8066     // takes as arguments the device id as well as the host pointer. The host
8067     // pointer is used by the runtime library to identify the current target
8068     // region, so it only has to be unique and not necessarily point to
8069     // anything. It could be the pointer to the outlined function that
8070     // implements the target region, but we aren't using that so that the
8071     // compiler doesn't need to keep that, and could therefore inline the host
8072     // function if proven worthwhile during optimization.
8073 
8074     // From this point on, we need to have an ID of the target region defined.
8075     assert(OutlinedFnID && "Invalid outlined function ID!");
8076 
8077     // Emit device ID if any.
8078     llvm::Value *DeviceID;
8079     if (Device) {
8080       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
8081                                            CGF.Int64Ty, /*isSigned=*/true);
8082     } else {
8083       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
8084     }
8085 
8086     // Emit the number of elements in the offloading arrays.
8087     llvm::Value *PointerNum =
8088         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
8089 
8090     // Return value of the runtime offloading call.
8091     llvm::Value *Return;
8092 
8093     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(*this, CGF, D);
8094     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(*this, CGF, D);
8095 
8096     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
8097     // The target region is an outlined function launched by the runtime
8098     // via calls __tgt_target() or __tgt_target_teams().
8099     //
8100     // __tgt_target() launches a target region with one team and one thread,
8101     // executing a serial region.  This master thread may in turn launch
8102     // more threads within its team upon encountering a parallel region,
8103     // however, no additional teams can be launched on the device.
8104     //
8105     // __tgt_target_teams() launches a target region with one or more teams,
8106     // each with one or more threads.  This call is required for target
8107     // constructs such as:
8108     //  'target teams'
8109     //  'target' / 'teams'
8110     //  'target teams distribute parallel for'
8111     //  'target parallel'
8112     // and so on.
8113     //
8114     // Note that on the host and CPU targets, the runtime implementation of
8115     // these calls simply call the outlined function without forking threads.
8116     // The outlined functions themselves have runtime calls to
8117     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
8118     // the compiler in emitTeamsCall() and emitParallelCall().
8119     //
8120     // In contrast, on the NVPTX target, the implementation of
8121     // __tgt_target_teams() launches a GPU kernel with the requested number
8122     // of teams and threads so no additional calls to the runtime are required.
8123     if (NumTeams) {
8124       // If we have NumTeams defined this means that we have an enclosed teams
8125       // region. Therefore we also expect to have NumThreads defined. These two
8126       // values should be defined in the presence of a teams directive,
8127       // regardless of having any clauses associated. If the user is using teams
8128       // but no clauses, these two values will be the default that should be
8129       // passed to the runtime library - a 32-bit integer with the value zero.
8130       assert(NumThreads && "Thread limit expression should be available along "
8131                            "with number of teams.");
8132       llvm::Value *OffloadingArgs[] = {DeviceID,
8133                                        OutlinedFnID,
8134                                        PointerNum,
8135                                        InputInfo.BasePointersArray.getPointer(),
8136                                        InputInfo.PointersArray.getPointer(),
8137                                        InputInfo.SizesArray.getPointer(),
8138                                        MapTypesArray,
8139                                        NumTeams,
8140                                        NumThreads};
8141       Return = CGF.EmitRuntimeCall(
8142           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
8143                                           : OMPRTL__tgt_target_teams),
8144           OffloadingArgs);
8145     } else {
8146       llvm::Value *OffloadingArgs[] = {DeviceID,
8147                                        OutlinedFnID,
8148                                        PointerNum,
8149                                        InputInfo.BasePointersArray.getPointer(),
8150                                        InputInfo.PointersArray.getPointer(),
8151                                        InputInfo.SizesArray.getPointer(),
8152                                        MapTypesArray};
8153       Return = CGF.EmitRuntimeCall(
8154           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
8155                                           : OMPRTL__tgt_target),
8156           OffloadingArgs);
8157     }
8158 
8159     // Check the error code and execute the host version if required.
8160     llvm::BasicBlock *OffloadFailedBlock =
8161         CGF.createBasicBlock("omp_offload.failed");
8162     llvm::BasicBlock *OffloadContBlock =
8163         CGF.createBasicBlock("omp_offload.cont");
8164     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
8165     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
8166 
8167     CGF.EmitBlock(OffloadFailedBlock);
8168     if (RequiresOuterTask) {
8169       CapturedVars.clear();
8170       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
8171     }
8172     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
8173     CGF.EmitBranch(OffloadContBlock);
8174 
8175     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
8176   };
8177 
8178   // Notify that the host version must be executed.
8179   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
8180                     RequiresOuterTask](CodeGenFunction &CGF,
8181                                        PrePostActionTy &) {
8182     if (RequiresOuterTask) {
8183       CapturedVars.clear();
8184       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
8185     }
8186     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
8187   };
8188 
8189   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
8190                           &CapturedVars, RequiresOuterTask,
8191                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
8192     // Fill up the arrays with all the captured variables.
8193     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
8194     MappableExprsHandler::MapValuesArrayTy Pointers;
8195     MappableExprsHandler::MapValuesArrayTy Sizes;
8196     MappableExprsHandler::MapFlagsArrayTy MapTypes;
8197 
8198     // Get mappable expression information.
8199     MappableExprsHandler MEHandler(D, CGF);
8200     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
8201 
8202     auto RI = CS.getCapturedRecordDecl()->field_begin();
8203     auto CV = CapturedVars.begin();
8204     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
8205                                               CE = CS.capture_end();
8206          CI != CE; ++CI, ++RI, ++CV) {
8207       MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
8208       MappableExprsHandler::MapValuesArrayTy CurPointers;
8209       MappableExprsHandler::MapValuesArrayTy CurSizes;
8210       MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
8211       MappableExprsHandler::StructRangeInfoTy PartialStruct;
8212 
8213       // VLA sizes are passed to the outlined region by copy and do not have map
8214       // information associated.
8215       if (CI->capturesVariableArrayType()) {
8216         CurBasePointers.push_back(*CV);
8217         CurPointers.push_back(*CV);
8218         CurSizes.push_back(CGF.getTypeSize(RI->getType()));
8219         // Copy to the device as an argument. No need to retrieve it.
8220         CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
8221                               MappableExprsHandler::OMP_MAP_TARGET_PARAM);
8222       } else {
8223         // If we have any information in the map clause, we use it, otherwise we
8224         // just do a default mapping.
8225         MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
8226                                          CurSizes, CurMapTypes, PartialStruct);
8227         if (CurBasePointers.empty())
8228           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
8229                                            CurPointers, CurSizes, CurMapTypes);
8230         // Generate correct mapping for variables captured by reference in
8231         // lambdas.
8232         if (CI->capturesVariable())
8233           MEHandler.generateInfoForLambdaCaptures(
8234               CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
8235               CurMapTypes, LambdaPointers);
8236       }
8237       // We expect to have at least an element of information for this capture.
8238       assert(!CurBasePointers.empty() &&
8239              "Non-existing map pointer for capture!");
8240       assert(CurBasePointers.size() == CurPointers.size() &&
8241              CurBasePointers.size() == CurSizes.size() &&
8242              CurBasePointers.size() == CurMapTypes.size() &&
8243              "Inconsistent map information sizes!");
8244 
8245       // If there is an entry in PartialStruct it means we have a struct with
8246       // individual members mapped. Emit an extra combined entry.
8247       if (PartialStruct.Base.isValid())
8248         MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
8249                                     CurMapTypes, PartialStruct);
8250 
8251       // We need to append the results of this capture to what we already have.
8252       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8253       Pointers.append(CurPointers.begin(), CurPointers.end());
8254       Sizes.append(CurSizes.begin(), CurSizes.end());
8255       MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
8256     }
8257     // Adjust MEMBER_OF flags for the lambdas captures.
8258     MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
8259                                               Pointers, MapTypes);
8260     // Map other list items in the map clause which are not captured variables
8261     // but "declare target link" global variables.
8262     MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
8263                                                MapTypes);
8264 
8265     TargetDataInfo Info;
8266     // Fill up the arrays and create the arguments.
8267     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
8268     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
8269                                  Info.PointersArray, Info.SizesArray,
8270                                  Info.MapTypesArray, Info);
8271     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
8272     InputInfo.BasePointersArray =
8273         Address(Info.BasePointersArray, CGM.getPointerAlign());
8274     InputInfo.PointersArray =
8275         Address(Info.PointersArray, CGM.getPointerAlign());
8276     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
8277     MapTypesArray = Info.MapTypesArray;
8278     if (RequiresOuterTask)
8279       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
8280     else
8281       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
8282   };
8283 
8284   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
8285                              CodeGenFunction &CGF, PrePostActionTy &) {
8286     if (RequiresOuterTask) {
8287       CodeGenFunction::OMPTargetDataInfo InputInfo;
8288       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
8289     } else {
8290       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
8291     }
8292   };
8293 
8294   // If we have a target function ID it means that we need to support
8295   // offloading, otherwise, just execute on the host. We need to execute on host
8296   // regardless of the conditional in the if clause if, e.g., the user do not
8297   // specify target triples.
8298   if (OutlinedFnID) {
8299     if (IfCond) {
8300       emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
8301     } else {
8302       RegionCodeGenTy ThenRCG(TargetThenGen);
8303       ThenRCG(CGF);
8304     }
8305   } else {
8306     RegionCodeGenTy ElseRCG(TargetElseGen);
8307     ElseRCG(CGF);
8308   }
8309 }
8310 
8311 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
8312                                                     StringRef ParentName) {
8313   if (!S)
8314     return;
8315 
8316   // Codegen OMP target directives that offload compute to the device.
8317   bool RequiresDeviceCodegen =
8318       isa<OMPExecutableDirective>(S) &&
8319       isOpenMPTargetExecutionDirective(
8320           cast<OMPExecutableDirective>(S)->getDirectiveKind());
8321 
8322   if (RequiresDeviceCodegen) {
8323     const auto &E = *cast<OMPExecutableDirective>(S);
8324     unsigned DeviceID;
8325     unsigned FileID;
8326     unsigned Line;
8327     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
8328                              FileID, Line);
8329 
8330     // Is this a target region that should not be emitted as an entry point? If
8331     // so just signal we are done with this target region.
8332     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
8333                                                             ParentName, Line))
8334       return;
8335 
8336     switch (E.getDirectiveKind()) {
8337     case OMPD_target:
8338       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
8339                                                    cast<OMPTargetDirective>(E));
8340       break;
8341     case OMPD_target_parallel:
8342       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
8343           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
8344       break;
8345     case OMPD_target_teams:
8346       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
8347           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
8348       break;
8349     case OMPD_target_teams_distribute:
8350       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
8351           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
8352       break;
8353     case OMPD_target_teams_distribute_simd:
8354       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
8355           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
8356       break;
8357     case OMPD_target_parallel_for:
8358       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
8359           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
8360       break;
8361     case OMPD_target_parallel_for_simd:
8362       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
8363           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
8364       break;
8365     case OMPD_target_simd:
8366       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
8367           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
8368       break;
8369     case OMPD_target_teams_distribute_parallel_for:
8370       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
8371           CGM, ParentName,
8372           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
8373       break;
8374     case OMPD_target_teams_distribute_parallel_for_simd:
8375       CodeGenFunction::
8376           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
8377               CGM, ParentName,
8378               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
8379       break;
8380     case OMPD_parallel:
8381     case OMPD_for:
8382     case OMPD_parallel_for:
8383     case OMPD_parallel_sections:
8384     case OMPD_for_simd:
8385     case OMPD_parallel_for_simd:
8386     case OMPD_cancel:
8387     case OMPD_cancellation_point:
8388     case OMPD_ordered:
8389     case OMPD_threadprivate:
8390     case OMPD_task:
8391     case OMPD_simd:
8392     case OMPD_sections:
8393     case OMPD_section:
8394     case OMPD_single:
8395     case OMPD_master:
8396     case OMPD_critical:
8397     case OMPD_taskyield:
8398     case OMPD_barrier:
8399     case OMPD_taskwait:
8400     case OMPD_taskgroup:
8401     case OMPD_atomic:
8402     case OMPD_flush:
8403     case OMPD_teams:
8404     case OMPD_target_data:
8405     case OMPD_target_exit_data:
8406     case OMPD_target_enter_data:
8407     case OMPD_distribute:
8408     case OMPD_distribute_simd:
8409     case OMPD_distribute_parallel_for:
8410     case OMPD_distribute_parallel_for_simd:
8411     case OMPD_teams_distribute:
8412     case OMPD_teams_distribute_simd:
8413     case OMPD_teams_distribute_parallel_for:
8414     case OMPD_teams_distribute_parallel_for_simd:
8415     case OMPD_target_update:
8416     case OMPD_declare_simd:
8417     case OMPD_declare_target:
8418     case OMPD_end_declare_target:
8419     case OMPD_declare_reduction:
8420     case OMPD_taskloop:
8421     case OMPD_taskloop_simd:
8422     case OMPD_requires:
8423     case OMPD_unknown:
8424       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
8425     }
8426     return;
8427   }
8428 
8429   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
8430     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
8431       return;
8432 
8433     scanForTargetRegionsFunctions(
8434         E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
8435     return;
8436   }
8437 
8438   // If this is a lambda function, look into its body.
8439   if (const auto *L = dyn_cast<LambdaExpr>(S))
8440     S = L->getBody();
8441 
8442   // Keep looking for target regions recursively.
8443   for (const Stmt *II : S->children())
8444     scanForTargetRegionsFunctions(II, ParentName);
8445 }
8446 
8447 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
8448   // If emitting code for the host, we do not process FD here. Instead we do
8449   // the normal code generation.
8450   if (!CGM.getLangOpts().OpenMPIsDevice)
8451     return false;
8452 
8453   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
8454   StringRef Name = CGM.getMangledName(GD);
8455   // Try to detect target regions in the function.
8456   if (const auto *FD = dyn_cast<FunctionDecl>(VD))
8457     scanForTargetRegionsFunctions(FD->getBody(), Name);
8458 
8459   // Do not to emit function if it is not marked as declare target.
8460   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
8461          AlreadyEmittedTargetFunctions.count(Name) == 0;
8462 }
8463 
8464 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
8465   if (!CGM.getLangOpts().OpenMPIsDevice)
8466     return false;
8467 
8468   // Check if there are Ctors/Dtors in this declaration and look for target
8469   // regions in it. We use the complete variant to produce the kernel name
8470   // mangling.
8471   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
8472   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
8473     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
8474       StringRef ParentName =
8475           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
8476       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
8477     }
8478     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
8479       StringRef ParentName =
8480           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
8481       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
8482     }
8483   }
8484 
8485   // Do not to emit variable if it is not marked as declare target.
8486   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8487       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
8488           cast<VarDecl>(GD.getDecl()));
8489   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link) {
8490     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
8491     return true;
8492   }
8493   return false;
8494 }
8495 
8496 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
8497                                                    llvm::Constant *Addr) {
8498   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8499       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
8500   if (!Res) {
8501     if (CGM.getLangOpts().OpenMPIsDevice) {
8502       // Register non-target variables being emitted in device code (debug info
8503       // may cause this).
8504       StringRef VarName = CGM.getMangledName(VD);
8505       EmittedNonTargetVariables.try_emplace(VarName, Addr);
8506     }
8507     return;
8508   }
8509   // Register declare target variables.
8510   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
8511   StringRef VarName;
8512   CharUnits VarSize;
8513   llvm::GlobalValue::LinkageTypes Linkage;
8514   switch (*Res) {
8515   case OMPDeclareTargetDeclAttr::MT_To:
8516     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
8517     VarName = CGM.getMangledName(VD);
8518     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
8519       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
8520       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
8521     } else {
8522       VarSize = CharUnits::Zero();
8523     }
8524     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
8525     // Temp solution to prevent optimizations of the internal variables.
8526     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
8527       std::string RefName = getName({VarName, "ref"});
8528       if (!CGM.GetGlobalValue(RefName)) {
8529         llvm::Constant *AddrRef =
8530             getOrCreateInternalVariable(Addr->getType(), RefName);
8531         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
8532         GVAddrRef->setConstant(/*Val=*/true);
8533         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
8534         GVAddrRef->setInitializer(Addr);
8535         CGM.addCompilerUsedGlobal(GVAddrRef);
8536       }
8537     }
8538     break;
8539   case OMPDeclareTargetDeclAttr::MT_Link:
8540     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
8541     if (CGM.getLangOpts().OpenMPIsDevice) {
8542       VarName = Addr->getName();
8543       Addr = nullptr;
8544     } else {
8545       VarName = getAddrOfDeclareTargetLink(VD).getName();
8546       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetLink(VD).getPointer());
8547     }
8548     VarSize = CGM.getPointerSize();
8549     Linkage = llvm::GlobalValue::WeakAnyLinkage;
8550     break;
8551   }
8552   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
8553       VarName, Addr, VarSize, Flags, Linkage);
8554 }
8555 
8556 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
8557   if (isa<FunctionDecl>(GD.getDecl()) ||
8558       isa<OMPDeclareReductionDecl>(GD.getDecl()))
8559     return emitTargetFunctions(GD);
8560 
8561   return emitTargetGlobalVariable(GD);
8562 }
8563 
8564 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
8565   for (const VarDecl *VD : DeferredGlobalVariables) {
8566     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8567         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
8568     if (!Res)
8569       continue;
8570     if (*Res == OMPDeclareTargetDeclAttr::MT_To) {
8571       CGM.EmitGlobal(VD);
8572     } else {
8573       assert(*Res == OMPDeclareTargetDeclAttr::MT_Link &&
8574              "Expected to or link clauses.");
8575       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD);
8576     }
8577   }
8578 }
8579 
8580 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
8581     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
8582   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
8583          " Expected target-based directive.");
8584 }
8585 
8586 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
8587     CodeGenModule &CGM)
8588     : CGM(CGM) {
8589   if (CGM.getLangOpts().OpenMPIsDevice) {
8590     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
8591     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
8592   }
8593 }
8594 
8595 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
8596   if (CGM.getLangOpts().OpenMPIsDevice)
8597     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
8598 }
8599 
8600 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
8601   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
8602     return true;
8603 
8604   StringRef Name = CGM.getMangledName(GD);
8605   const auto *D = cast<FunctionDecl>(GD.getDecl());
8606   // Do not to emit function if it is marked as declare target as it was already
8607   // emitted.
8608   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
8609     if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) {
8610       if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name)))
8611         return !F->isDeclaration();
8612       return false;
8613     }
8614     return true;
8615   }
8616 
8617   return !AlreadyEmittedTargetFunctions.insert(Name).second;
8618 }
8619 
8620 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
8621   // If we have offloading in the current module, we need to emit the entries
8622   // now and register the offloading descriptor.
8623   createOffloadEntriesAndInfoMetadata();
8624 
8625   // Create and register the offloading binary descriptors. This is the main
8626   // entity that captures all the information about offloading in the current
8627   // compilation unit.
8628   return createOffloadingBinaryDescriptorRegistration();
8629 }
8630 
8631 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
8632                                     const OMPExecutableDirective &D,
8633                                     SourceLocation Loc,
8634                                     llvm::Value *OutlinedFn,
8635                                     ArrayRef<llvm::Value *> CapturedVars) {
8636   if (!CGF.HaveInsertPoint())
8637     return;
8638 
8639   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
8640   CodeGenFunction::RunCleanupsScope Scope(CGF);
8641 
8642   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
8643   llvm::Value *Args[] = {
8644       RTLoc,
8645       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
8646       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
8647   llvm::SmallVector<llvm::Value *, 16> RealArgs;
8648   RealArgs.append(std::begin(Args), std::end(Args));
8649   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
8650 
8651   llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
8652   CGF.EmitRuntimeCall(RTLFn, RealArgs);
8653 }
8654 
8655 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
8656                                          const Expr *NumTeams,
8657                                          const Expr *ThreadLimit,
8658                                          SourceLocation Loc) {
8659   if (!CGF.HaveInsertPoint())
8660     return;
8661 
8662   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
8663 
8664   llvm::Value *NumTeamsVal =
8665       NumTeams
8666           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
8667                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
8668           : CGF.Builder.getInt32(0);
8669 
8670   llvm::Value *ThreadLimitVal =
8671       ThreadLimit
8672           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
8673                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
8674           : CGF.Builder.getInt32(0);
8675 
8676   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
8677   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
8678                                      ThreadLimitVal};
8679   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
8680                       PushNumTeamsArgs);
8681 }
8682 
8683 void CGOpenMPRuntime::emitTargetDataCalls(
8684     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
8685     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
8686   if (!CGF.HaveInsertPoint())
8687     return;
8688 
8689   // Action used to replace the default codegen action and turn privatization
8690   // off.
8691   PrePostActionTy NoPrivAction;
8692 
8693   // Generate the code for the opening of the data environment. Capture all the
8694   // arguments of the runtime call by reference because they are used in the
8695   // closing of the region.
8696   auto &&BeginThenGen = [this, &D, Device, &Info,
8697                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
8698     // Fill up the arrays with all the mapped variables.
8699     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
8700     MappableExprsHandler::MapValuesArrayTy Pointers;
8701     MappableExprsHandler::MapValuesArrayTy Sizes;
8702     MappableExprsHandler::MapFlagsArrayTy MapTypes;
8703 
8704     // Get map clause information.
8705     MappableExprsHandler MCHandler(D, CGF);
8706     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
8707 
8708     // Fill up the arrays and create the arguments.
8709     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
8710 
8711     llvm::Value *BasePointersArrayArg = nullptr;
8712     llvm::Value *PointersArrayArg = nullptr;
8713     llvm::Value *SizesArrayArg = nullptr;
8714     llvm::Value *MapTypesArrayArg = nullptr;
8715     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
8716                                  SizesArrayArg, MapTypesArrayArg, Info);
8717 
8718     // Emit device ID if any.
8719     llvm::Value *DeviceID = nullptr;
8720     if (Device) {
8721       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
8722                                            CGF.Int64Ty, /*isSigned=*/true);
8723     } else {
8724       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
8725     }
8726 
8727     // Emit the number of elements in the offloading arrays.
8728     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
8729 
8730     llvm::Value *OffloadingArgs[] = {
8731         DeviceID,         PointerNum,    BasePointersArrayArg,
8732         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
8733     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin),
8734                         OffloadingArgs);
8735 
8736     // If device pointer privatization is required, emit the body of the region
8737     // here. It will have to be duplicated: with and without privatization.
8738     if (!Info.CaptureDeviceAddrMap.empty())
8739       CodeGen(CGF);
8740   };
8741 
8742   // Generate code for the closing of the data region.
8743   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
8744                                             PrePostActionTy &) {
8745     assert(Info.isValid() && "Invalid data environment closing arguments.");
8746 
8747     llvm::Value *BasePointersArrayArg = nullptr;
8748     llvm::Value *PointersArrayArg = nullptr;
8749     llvm::Value *SizesArrayArg = nullptr;
8750     llvm::Value *MapTypesArrayArg = nullptr;
8751     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
8752                                  SizesArrayArg, MapTypesArrayArg, Info);
8753 
8754     // Emit device ID if any.
8755     llvm::Value *DeviceID = nullptr;
8756     if (Device) {
8757       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
8758                                            CGF.Int64Ty, /*isSigned=*/true);
8759     } else {
8760       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
8761     }
8762 
8763     // Emit the number of elements in the offloading arrays.
8764     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
8765 
8766     llvm::Value *OffloadingArgs[] = {
8767         DeviceID,         PointerNum,    BasePointersArrayArg,
8768         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
8769     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end),
8770                         OffloadingArgs);
8771   };
8772 
8773   // If we need device pointer privatization, we need to emit the body of the
8774   // region with no privatization in the 'else' branch of the conditional.
8775   // Otherwise, we don't have to do anything.
8776   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
8777                                                          PrePostActionTy &) {
8778     if (!Info.CaptureDeviceAddrMap.empty()) {
8779       CodeGen.setAction(NoPrivAction);
8780       CodeGen(CGF);
8781     }
8782   };
8783 
8784   // We don't have to do anything to close the region if the if clause evaluates
8785   // to false.
8786   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
8787 
8788   if (IfCond) {
8789     emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
8790   } else {
8791     RegionCodeGenTy RCG(BeginThenGen);
8792     RCG(CGF);
8793   }
8794 
8795   // If we don't require privatization of device pointers, we emit the body in
8796   // between the runtime calls. This avoids duplicating the body code.
8797   if (Info.CaptureDeviceAddrMap.empty()) {
8798     CodeGen.setAction(NoPrivAction);
8799     CodeGen(CGF);
8800   }
8801 
8802   if (IfCond) {
8803     emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen);
8804   } else {
8805     RegionCodeGenTy RCG(EndThenGen);
8806     RCG(CGF);
8807   }
8808 }
8809 
8810 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
8811     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
8812     const Expr *Device) {
8813   if (!CGF.HaveInsertPoint())
8814     return;
8815 
8816   assert((isa<OMPTargetEnterDataDirective>(D) ||
8817           isa<OMPTargetExitDataDirective>(D) ||
8818           isa<OMPTargetUpdateDirective>(D)) &&
8819          "Expecting either target enter, exit data, or update directives.");
8820 
8821   CodeGenFunction::OMPTargetDataInfo InputInfo;
8822   llvm::Value *MapTypesArray = nullptr;
8823   // Generate the code for the opening of the data environment.
8824   auto &&ThenGen = [this, &D, Device, &InputInfo,
8825                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
8826     // Emit device ID if any.
8827     llvm::Value *DeviceID = nullptr;
8828     if (Device) {
8829       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
8830                                            CGF.Int64Ty, /*isSigned=*/true);
8831     } else {
8832       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
8833     }
8834 
8835     // Emit the number of elements in the offloading arrays.
8836     llvm::Constant *PointerNum =
8837         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
8838 
8839     llvm::Value *OffloadingArgs[] = {DeviceID,
8840                                      PointerNum,
8841                                      InputInfo.BasePointersArray.getPointer(),
8842                                      InputInfo.PointersArray.getPointer(),
8843                                      InputInfo.SizesArray.getPointer(),
8844                                      MapTypesArray};
8845 
8846     // Select the right runtime function call for each expected standalone
8847     // directive.
8848     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
8849     OpenMPRTLFunction RTLFn;
8850     switch (D.getDirectiveKind()) {
8851     case OMPD_target_enter_data:
8852       RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
8853                         : OMPRTL__tgt_target_data_begin;
8854       break;
8855     case OMPD_target_exit_data:
8856       RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
8857                         : OMPRTL__tgt_target_data_end;
8858       break;
8859     case OMPD_target_update:
8860       RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
8861                         : OMPRTL__tgt_target_data_update;
8862       break;
8863     case OMPD_parallel:
8864     case OMPD_for:
8865     case OMPD_parallel_for:
8866     case OMPD_parallel_sections:
8867     case OMPD_for_simd:
8868     case OMPD_parallel_for_simd:
8869     case OMPD_cancel:
8870     case OMPD_cancellation_point:
8871     case OMPD_ordered:
8872     case OMPD_threadprivate:
8873     case OMPD_task:
8874     case OMPD_simd:
8875     case OMPD_sections:
8876     case OMPD_section:
8877     case OMPD_single:
8878     case OMPD_master:
8879     case OMPD_critical:
8880     case OMPD_taskyield:
8881     case OMPD_barrier:
8882     case OMPD_taskwait:
8883     case OMPD_taskgroup:
8884     case OMPD_atomic:
8885     case OMPD_flush:
8886     case OMPD_teams:
8887     case OMPD_target_data:
8888     case OMPD_distribute:
8889     case OMPD_distribute_simd:
8890     case OMPD_distribute_parallel_for:
8891     case OMPD_distribute_parallel_for_simd:
8892     case OMPD_teams_distribute:
8893     case OMPD_teams_distribute_simd:
8894     case OMPD_teams_distribute_parallel_for:
8895     case OMPD_teams_distribute_parallel_for_simd:
8896     case OMPD_declare_simd:
8897     case OMPD_declare_target:
8898     case OMPD_end_declare_target:
8899     case OMPD_declare_reduction:
8900     case OMPD_taskloop:
8901     case OMPD_taskloop_simd:
8902     case OMPD_target:
8903     case OMPD_target_simd:
8904     case OMPD_target_teams_distribute:
8905     case OMPD_target_teams_distribute_simd:
8906     case OMPD_target_teams_distribute_parallel_for:
8907     case OMPD_target_teams_distribute_parallel_for_simd:
8908     case OMPD_target_teams:
8909     case OMPD_target_parallel:
8910     case OMPD_target_parallel_for:
8911     case OMPD_target_parallel_for_simd:
8912     case OMPD_requires:
8913     case OMPD_unknown:
8914       llvm_unreachable("Unexpected standalone target data directive.");
8915       break;
8916     }
8917     CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
8918   };
8919 
8920   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
8921                              CodeGenFunction &CGF, PrePostActionTy &) {
8922     // Fill up the arrays with all the mapped variables.
8923     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
8924     MappableExprsHandler::MapValuesArrayTy Pointers;
8925     MappableExprsHandler::MapValuesArrayTy Sizes;
8926     MappableExprsHandler::MapFlagsArrayTy MapTypes;
8927 
8928     // Get map clause information.
8929     MappableExprsHandler MEHandler(D, CGF);
8930     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
8931 
8932     TargetDataInfo Info;
8933     // Fill up the arrays and create the arguments.
8934     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
8935     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
8936                                  Info.PointersArray, Info.SizesArray,
8937                                  Info.MapTypesArray, Info);
8938     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
8939     InputInfo.BasePointersArray =
8940         Address(Info.BasePointersArray, CGM.getPointerAlign());
8941     InputInfo.PointersArray =
8942         Address(Info.PointersArray, CGM.getPointerAlign());
8943     InputInfo.SizesArray =
8944         Address(Info.SizesArray, CGM.getPointerAlign());
8945     MapTypesArray = Info.MapTypesArray;
8946     if (D.hasClausesOfKind<OMPDependClause>())
8947       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
8948     else
8949       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
8950   };
8951 
8952   if (IfCond) {
8953     emitOMPIfClause(CGF, IfCond, TargetThenGen,
8954                     [](CodeGenFunction &CGF, PrePostActionTy &) {});
8955   } else {
8956     RegionCodeGenTy ThenRCG(TargetThenGen);
8957     ThenRCG(CGF);
8958   }
8959 }
8960 
8961 namespace {
8962   /// Kind of parameter in a function with 'declare simd' directive.
8963   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
8964   /// Attribute set of the parameter.
8965   struct ParamAttrTy {
8966     ParamKindTy Kind = Vector;
8967     llvm::APSInt StrideOrArg;
8968     llvm::APSInt Alignment;
8969   };
8970 } // namespace
8971 
8972 static unsigned evaluateCDTSize(const FunctionDecl *FD,
8973                                 ArrayRef<ParamAttrTy> ParamAttrs) {
8974   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
8975   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
8976   // of that clause. The VLEN value must be power of 2.
8977   // In other case the notion of the function`s "characteristic data type" (CDT)
8978   // is used to compute the vector length.
8979   // CDT is defined in the following order:
8980   //   a) For non-void function, the CDT is the return type.
8981   //   b) If the function has any non-uniform, non-linear parameters, then the
8982   //   CDT is the type of the first such parameter.
8983   //   c) If the CDT determined by a) or b) above is struct, union, or class
8984   //   type which is pass-by-value (except for the type that maps to the
8985   //   built-in complex data type), the characteristic data type is int.
8986   //   d) If none of the above three cases is applicable, the CDT is int.
8987   // The VLEN is then determined based on the CDT and the size of vector
8988   // register of that ISA for which current vector version is generated. The
8989   // VLEN is computed using the formula below:
8990   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
8991   // where vector register size specified in section 3.2.1 Registers and the
8992   // Stack Frame of original AMD64 ABI document.
8993   QualType RetType = FD->getReturnType();
8994   if (RetType.isNull())
8995     return 0;
8996   ASTContext &C = FD->getASTContext();
8997   QualType CDT;
8998   if (!RetType.isNull() && !RetType->isVoidType()) {
8999     CDT = RetType;
9000   } else {
9001     unsigned Offset = 0;
9002     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
9003       if (ParamAttrs[Offset].Kind == Vector)
9004         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
9005       ++Offset;
9006     }
9007     if (CDT.isNull()) {
9008       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
9009         if (ParamAttrs[I + Offset].Kind == Vector) {
9010           CDT = FD->getParamDecl(I)->getType();
9011           break;
9012         }
9013       }
9014     }
9015   }
9016   if (CDT.isNull())
9017     CDT = C.IntTy;
9018   CDT = CDT->getCanonicalTypeUnqualified();
9019   if (CDT->isRecordType() || CDT->isUnionType())
9020     CDT = C.IntTy;
9021   return C.getTypeSize(CDT);
9022 }
9023 
9024 static void
9025 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
9026                            const llvm::APSInt &VLENVal,
9027                            ArrayRef<ParamAttrTy> ParamAttrs,
9028                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
9029   struct ISADataTy {
9030     char ISA;
9031     unsigned VecRegSize;
9032   };
9033   ISADataTy ISAData[] = {
9034       {
9035           'b', 128
9036       }, // SSE
9037       {
9038           'c', 256
9039       }, // AVX
9040       {
9041           'd', 256
9042       }, // AVX2
9043       {
9044           'e', 512
9045       }, // AVX512
9046   };
9047   llvm::SmallVector<char, 2> Masked;
9048   switch (State) {
9049   case OMPDeclareSimdDeclAttr::BS_Undefined:
9050     Masked.push_back('N');
9051     Masked.push_back('M');
9052     break;
9053   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
9054     Masked.push_back('N');
9055     break;
9056   case OMPDeclareSimdDeclAttr::BS_Inbranch:
9057     Masked.push_back('M');
9058     break;
9059   }
9060   for (char Mask : Masked) {
9061     for (const ISADataTy &Data : ISAData) {
9062       SmallString<256> Buffer;
9063       llvm::raw_svector_ostream Out(Buffer);
9064       Out << "_ZGV" << Data.ISA << Mask;
9065       if (!VLENVal) {
9066         Out << llvm::APSInt::getUnsigned(Data.VecRegSize /
9067                                          evaluateCDTSize(FD, ParamAttrs));
9068       } else {
9069         Out << VLENVal;
9070       }
9071       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
9072         switch (ParamAttr.Kind){
9073         case LinearWithVarStride:
9074           Out << 's' << ParamAttr.StrideOrArg;
9075           break;
9076         case Linear:
9077           Out << 'l';
9078           if (!!ParamAttr.StrideOrArg)
9079             Out << ParamAttr.StrideOrArg;
9080           break;
9081         case Uniform:
9082           Out << 'u';
9083           break;
9084         case Vector:
9085           Out << 'v';
9086           break;
9087         }
9088         if (!!ParamAttr.Alignment)
9089           Out << 'a' << ParamAttr.Alignment;
9090       }
9091       Out << '_' << Fn->getName();
9092       Fn->addFnAttr(Out.str());
9093     }
9094   }
9095 }
9096 
9097 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
9098                                               llvm::Function *Fn) {
9099   ASTContext &C = CGM.getContext();
9100   FD = FD->getMostRecentDecl();
9101   // Map params to their positions in function decl.
9102   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
9103   if (isa<CXXMethodDecl>(FD))
9104     ParamPositions.try_emplace(FD, 0);
9105   unsigned ParamPos = ParamPositions.size();
9106   for (const ParmVarDecl *P : FD->parameters()) {
9107     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
9108     ++ParamPos;
9109   }
9110   while (FD) {
9111     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
9112       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
9113       // Mark uniform parameters.
9114       for (const Expr *E : Attr->uniforms()) {
9115         E = E->IgnoreParenImpCasts();
9116         unsigned Pos;
9117         if (isa<CXXThisExpr>(E)) {
9118           Pos = ParamPositions[FD];
9119         } else {
9120           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
9121                                 ->getCanonicalDecl();
9122           Pos = ParamPositions[PVD];
9123         }
9124         ParamAttrs[Pos].Kind = Uniform;
9125       }
9126       // Get alignment info.
9127       auto NI = Attr->alignments_begin();
9128       for (const Expr *E : Attr->aligneds()) {
9129         E = E->IgnoreParenImpCasts();
9130         unsigned Pos;
9131         QualType ParmTy;
9132         if (isa<CXXThisExpr>(E)) {
9133           Pos = ParamPositions[FD];
9134           ParmTy = E->getType();
9135         } else {
9136           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
9137                                 ->getCanonicalDecl();
9138           Pos = ParamPositions[PVD];
9139           ParmTy = PVD->getType();
9140         }
9141         ParamAttrs[Pos].Alignment =
9142             (*NI)
9143                 ? (*NI)->EvaluateKnownConstInt(C)
9144                 : llvm::APSInt::getUnsigned(
9145                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
9146                           .getQuantity());
9147         ++NI;
9148       }
9149       // Mark linear parameters.
9150       auto SI = Attr->steps_begin();
9151       auto MI = Attr->modifiers_begin();
9152       for (const Expr *E : Attr->linears()) {
9153         E = E->IgnoreParenImpCasts();
9154         unsigned Pos;
9155         if (isa<CXXThisExpr>(E)) {
9156           Pos = ParamPositions[FD];
9157         } else {
9158           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
9159                                 ->getCanonicalDecl();
9160           Pos = ParamPositions[PVD];
9161         }
9162         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
9163         ParamAttr.Kind = Linear;
9164         if (*SI) {
9165           if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C,
9166                                     Expr::SE_AllowSideEffects)) {
9167             if (const auto *DRE =
9168                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
9169               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
9170                 ParamAttr.Kind = LinearWithVarStride;
9171                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
9172                     ParamPositions[StridePVD->getCanonicalDecl()]);
9173               }
9174             }
9175           }
9176         }
9177         ++SI;
9178         ++MI;
9179       }
9180       llvm::APSInt VLENVal;
9181       if (const Expr *VLEN = Attr->getSimdlen())
9182         VLENVal = VLEN->EvaluateKnownConstInt(C);
9183       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
9184       if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
9185           CGM.getTriple().getArch() == llvm::Triple::x86_64)
9186         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
9187     }
9188     FD = FD->getPreviousDecl();
9189   }
9190 }
9191 
9192 namespace {
9193 /// Cleanup action for doacross support.
9194 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
9195 public:
9196   static const int DoacrossFinArgs = 2;
9197 
9198 private:
9199   llvm::Value *RTLFn;
9200   llvm::Value *Args[DoacrossFinArgs];
9201 
9202 public:
9203   DoacrossCleanupTy(llvm::Value *RTLFn, ArrayRef<llvm::Value *> CallArgs)
9204       : RTLFn(RTLFn) {
9205     assert(CallArgs.size() == DoacrossFinArgs);
9206     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
9207   }
9208   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
9209     if (!CGF.HaveInsertPoint())
9210       return;
9211     CGF.EmitRuntimeCall(RTLFn, Args);
9212   }
9213 };
9214 } // namespace
9215 
9216 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
9217                                        const OMPLoopDirective &D,
9218                                        ArrayRef<Expr *> NumIterations) {
9219   if (!CGF.HaveInsertPoint())
9220     return;
9221 
9222   ASTContext &C = CGM.getContext();
9223   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9224   RecordDecl *RD;
9225   if (KmpDimTy.isNull()) {
9226     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
9227     //  kmp_int64 lo; // lower
9228     //  kmp_int64 up; // upper
9229     //  kmp_int64 st; // stride
9230     // };
9231     RD = C.buildImplicitRecord("kmp_dim");
9232     RD->startDefinition();
9233     addFieldToRecordDecl(C, RD, Int64Ty);
9234     addFieldToRecordDecl(C, RD, Int64Ty);
9235     addFieldToRecordDecl(C, RD, Int64Ty);
9236     RD->completeDefinition();
9237     KmpDimTy = C.getRecordType(RD);
9238   } else {
9239     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
9240   }
9241   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
9242   QualType ArrayTy =
9243       C.getConstantArrayType(KmpDimTy, Size, ArrayType::Normal, 0);
9244 
9245   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9246   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
9247   enum { LowerFD = 0, UpperFD, StrideFD };
9248   // Fill dims with data.
9249   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
9250     LValue DimsLVal =
9251         CGF.MakeAddrLValue(CGF.Builder.CreateConstArrayGEP(
9252                                DimsAddr, I, C.getTypeSizeInChars(KmpDimTy)),
9253                            KmpDimTy);
9254     // dims.upper = num_iterations;
9255     LValue UpperLVal = CGF.EmitLValueForField(
9256         DimsLVal, *std::next(RD->field_begin(), UpperFD));
9257     llvm::Value *NumIterVal =
9258         CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]),
9259                                  D.getNumIterations()->getType(), Int64Ty,
9260                                  D.getNumIterations()->getExprLoc());
9261     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
9262     // dims.stride = 1;
9263     LValue StrideLVal = CGF.EmitLValueForField(
9264         DimsLVal, *std::next(RD->field_begin(), StrideFD));
9265     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
9266                           StrideLVal);
9267   }
9268 
9269   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
9270   // kmp_int32 num_dims, struct kmp_dim * dims);
9271   llvm::Value *Args[] = {
9272       emitUpdateLocation(CGF, D.getBeginLoc()),
9273       getThreadID(CGF, D.getBeginLoc()),
9274       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
9275       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9276           CGF.Builder
9277               .CreateConstArrayGEP(DimsAddr, 0, C.getTypeSizeInChars(KmpDimTy))
9278               .getPointer(),
9279           CGM.VoidPtrTy)};
9280 
9281   llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_init);
9282   CGF.EmitRuntimeCall(RTLFn, Args);
9283   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
9284       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
9285   llvm::Value *FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
9286   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
9287                                              llvm::makeArrayRef(FiniArgs));
9288 }
9289 
9290 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
9291                                           const OMPDependClause *C) {
9292   QualType Int64Ty =
9293       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9294   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
9295   QualType ArrayTy = CGM.getContext().getConstantArrayType(
9296       Int64Ty, Size, ArrayType::Normal, 0);
9297   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
9298   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
9299     const Expr *CounterVal = C->getLoopData(I);
9300     assert(CounterVal);
9301     llvm::Value *CntVal = CGF.EmitScalarConversion(
9302         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
9303         CounterVal->getExprLoc());
9304     CGF.EmitStoreOfScalar(
9305         CntVal,
9306         CGF.Builder.CreateConstArrayGEP(
9307             CntAddr, I, CGM.getContext().getTypeSizeInChars(Int64Ty)),
9308         /*Volatile=*/false, Int64Ty);
9309   }
9310   llvm::Value *Args[] = {
9311       emitUpdateLocation(CGF, C->getBeginLoc()),
9312       getThreadID(CGF, C->getBeginLoc()),
9313       CGF.Builder
9314           .CreateConstArrayGEP(CntAddr, 0,
9315                                CGM.getContext().getTypeSizeInChars(Int64Ty))
9316           .getPointer()};
9317   llvm::Value *RTLFn;
9318   if (C->getDependencyKind() == OMPC_DEPEND_source) {
9319     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
9320   } else {
9321     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
9322     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
9323   }
9324   CGF.EmitRuntimeCall(RTLFn, Args);
9325 }
9326 
9327 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
9328                                llvm::Value *Callee,
9329                                ArrayRef<llvm::Value *> Args) const {
9330   assert(Loc.isValid() && "Outlined function call location must be valid.");
9331   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
9332 
9333   if (auto *Fn = dyn_cast<llvm::Function>(Callee)) {
9334     if (Fn->doesNotThrow()) {
9335       CGF.EmitNounwindRuntimeCall(Fn, Args);
9336       return;
9337     }
9338   }
9339   CGF.EmitRuntimeCall(Callee, Args);
9340 }
9341 
9342 void CGOpenMPRuntime::emitOutlinedFunctionCall(
9343     CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn,
9344     ArrayRef<llvm::Value *> Args) const {
9345   emitCall(CGF, Loc, OutlinedFn, Args);
9346 }
9347 
9348 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
9349                                              const VarDecl *NativeParam,
9350                                              const VarDecl *TargetParam) const {
9351   return CGF.GetAddrOfLocalVar(NativeParam);
9352 }
9353 
9354 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
9355                                                    const VarDecl *VD) {
9356   return Address::invalid();
9357 }
9358 
9359 llvm::Value *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
9360     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
9361     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
9362   llvm_unreachable("Not supported in SIMD-only mode");
9363 }
9364 
9365 llvm::Value *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
9366     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
9367     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
9368   llvm_unreachable("Not supported in SIMD-only mode");
9369 }
9370 
9371 llvm::Value *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
9372     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
9373     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
9374     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
9375     bool Tied, unsigned &NumberOfParts) {
9376   llvm_unreachable("Not supported in SIMD-only mode");
9377 }
9378 
9379 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
9380                                            SourceLocation Loc,
9381                                            llvm::Value *OutlinedFn,
9382                                            ArrayRef<llvm::Value *> CapturedVars,
9383                                            const Expr *IfCond) {
9384   llvm_unreachable("Not supported in SIMD-only mode");
9385 }
9386 
9387 void CGOpenMPSIMDRuntime::emitCriticalRegion(
9388     CodeGenFunction &CGF, StringRef CriticalName,
9389     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
9390     const Expr *Hint) {
9391   llvm_unreachable("Not supported in SIMD-only mode");
9392 }
9393 
9394 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
9395                                            const RegionCodeGenTy &MasterOpGen,
9396                                            SourceLocation Loc) {
9397   llvm_unreachable("Not supported in SIMD-only mode");
9398 }
9399 
9400 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
9401                                             SourceLocation Loc) {
9402   llvm_unreachable("Not supported in SIMD-only mode");
9403 }
9404 
9405 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
9406     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
9407     SourceLocation Loc) {
9408   llvm_unreachable("Not supported in SIMD-only mode");
9409 }
9410 
9411 void CGOpenMPSIMDRuntime::emitSingleRegion(
9412     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
9413     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
9414     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
9415     ArrayRef<const Expr *> AssignmentOps) {
9416   llvm_unreachable("Not supported in SIMD-only mode");
9417 }
9418 
9419 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
9420                                             const RegionCodeGenTy &OrderedOpGen,
9421                                             SourceLocation Loc,
9422                                             bool IsThreads) {
9423   llvm_unreachable("Not supported in SIMD-only mode");
9424 }
9425 
9426 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
9427                                           SourceLocation Loc,
9428                                           OpenMPDirectiveKind Kind,
9429                                           bool EmitChecks,
9430                                           bool ForceSimpleCall) {
9431   llvm_unreachable("Not supported in SIMD-only mode");
9432 }
9433 
9434 void CGOpenMPSIMDRuntime::emitForDispatchInit(
9435     CodeGenFunction &CGF, SourceLocation Loc,
9436     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
9437     bool Ordered, const DispatchRTInput &DispatchValues) {
9438   llvm_unreachable("Not supported in SIMD-only mode");
9439 }
9440 
9441 void CGOpenMPSIMDRuntime::emitForStaticInit(
9442     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
9443     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
9444   llvm_unreachable("Not supported in SIMD-only mode");
9445 }
9446 
9447 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
9448     CodeGenFunction &CGF, SourceLocation Loc,
9449     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
9450   llvm_unreachable("Not supported in SIMD-only mode");
9451 }
9452 
9453 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
9454                                                      SourceLocation Loc,
9455                                                      unsigned IVSize,
9456                                                      bool IVSigned) {
9457   llvm_unreachable("Not supported in SIMD-only mode");
9458 }
9459 
9460 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
9461                                               SourceLocation Loc,
9462                                               OpenMPDirectiveKind DKind) {
9463   llvm_unreachable("Not supported in SIMD-only mode");
9464 }
9465 
9466 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
9467                                               SourceLocation Loc,
9468                                               unsigned IVSize, bool IVSigned,
9469                                               Address IL, Address LB,
9470                                               Address UB, Address ST) {
9471   llvm_unreachable("Not supported in SIMD-only mode");
9472 }
9473 
9474 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
9475                                                llvm::Value *NumThreads,
9476                                                SourceLocation Loc) {
9477   llvm_unreachable("Not supported in SIMD-only mode");
9478 }
9479 
9480 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
9481                                              OpenMPProcBindClauseKind ProcBind,
9482                                              SourceLocation Loc) {
9483   llvm_unreachable("Not supported in SIMD-only mode");
9484 }
9485 
9486 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
9487                                                     const VarDecl *VD,
9488                                                     Address VDAddr,
9489                                                     SourceLocation Loc) {
9490   llvm_unreachable("Not supported in SIMD-only mode");
9491 }
9492 
9493 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
9494     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
9495     CodeGenFunction *CGF) {
9496   llvm_unreachable("Not supported in SIMD-only mode");
9497 }
9498 
9499 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
9500     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
9501   llvm_unreachable("Not supported in SIMD-only mode");
9502 }
9503 
9504 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
9505                                     ArrayRef<const Expr *> Vars,
9506                                     SourceLocation Loc) {
9507   llvm_unreachable("Not supported in SIMD-only mode");
9508 }
9509 
9510 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
9511                                        const OMPExecutableDirective &D,
9512                                        llvm::Value *TaskFunction,
9513                                        QualType SharedsTy, Address Shareds,
9514                                        const Expr *IfCond,
9515                                        const OMPTaskDataTy &Data) {
9516   llvm_unreachable("Not supported in SIMD-only mode");
9517 }
9518 
9519 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
9520     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
9521     llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds,
9522     const Expr *IfCond, const OMPTaskDataTy &Data) {
9523   llvm_unreachable("Not supported in SIMD-only mode");
9524 }
9525 
9526 void CGOpenMPSIMDRuntime::emitReduction(
9527     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
9528     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
9529     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
9530   assert(Options.SimpleReduction && "Only simple reduction is expected.");
9531   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
9532                                  ReductionOps, Options);
9533 }
9534 
9535 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
9536     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
9537     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
9538   llvm_unreachable("Not supported in SIMD-only mode");
9539 }
9540 
9541 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
9542                                                   SourceLocation Loc,
9543                                                   ReductionCodeGen &RCG,
9544                                                   unsigned N) {
9545   llvm_unreachable("Not supported in SIMD-only mode");
9546 }
9547 
9548 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
9549                                                   SourceLocation Loc,
9550                                                   llvm::Value *ReductionsPtr,
9551                                                   LValue SharedLVal) {
9552   llvm_unreachable("Not supported in SIMD-only mode");
9553 }
9554 
9555 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
9556                                            SourceLocation Loc) {
9557   llvm_unreachable("Not supported in SIMD-only mode");
9558 }
9559 
9560 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
9561     CodeGenFunction &CGF, SourceLocation Loc,
9562     OpenMPDirectiveKind CancelRegion) {
9563   llvm_unreachable("Not supported in SIMD-only mode");
9564 }
9565 
9566 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
9567                                          SourceLocation Loc, const Expr *IfCond,
9568                                          OpenMPDirectiveKind CancelRegion) {
9569   llvm_unreachable("Not supported in SIMD-only mode");
9570 }
9571 
9572 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
9573     const OMPExecutableDirective &D, StringRef ParentName,
9574     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
9575     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
9576   llvm_unreachable("Not supported in SIMD-only mode");
9577 }
9578 
9579 void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF,
9580                                          const OMPExecutableDirective &D,
9581                                          llvm::Value *OutlinedFn,
9582                                          llvm::Value *OutlinedFnID,
9583                                          const Expr *IfCond, const Expr *Device) {
9584   llvm_unreachable("Not supported in SIMD-only mode");
9585 }
9586 
9587 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
9588   llvm_unreachable("Not supported in SIMD-only mode");
9589 }
9590 
9591 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9592   llvm_unreachable("Not supported in SIMD-only mode");
9593 }
9594 
9595 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
9596   return false;
9597 }
9598 
9599 llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() {
9600   return nullptr;
9601 }
9602 
9603 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
9604                                         const OMPExecutableDirective &D,
9605                                         SourceLocation Loc,
9606                                         llvm::Value *OutlinedFn,
9607                                         ArrayRef<llvm::Value *> CapturedVars) {
9608   llvm_unreachable("Not supported in SIMD-only mode");
9609 }
9610 
9611 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
9612                                              const Expr *NumTeams,
9613                                              const Expr *ThreadLimit,
9614                                              SourceLocation Loc) {
9615   llvm_unreachable("Not supported in SIMD-only mode");
9616 }
9617 
9618 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
9619     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
9620     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
9621   llvm_unreachable("Not supported in SIMD-only mode");
9622 }
9623 
9624 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
9625     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
9626     const Expr *Device) {
9627   llvm_unreachable("Not supported in SIMD-only mode");
9628 }
9629 
9630 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
9631                                            const OMPLoopDirective &D,
9632                                            ArrayRef<Expr *> NumIterations) {
9633   llvm_unreachable("Not supported in SIMD-only mode");
9634 }
9635 
9636 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
9637                                               const OMPDependClause *C) {
9638   llvm_unreachable("Not supported in SIMD-only mode");
9639 }
9640 
9641 const VarDecl *
9642 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
9643                                         const VarDecl *NativeParam) const {
9644   llvm_unreachable("Not supported in SIMD-only mode");
9645 }
9646 
9647 Address
9648 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
9649                                          const VarDecl *NativeParam,
9650                                          const VarDecl *TargetParam) const {
9651   llvm_unreachable("Not supported in SIMD-only mode");
9652 }
9653 
9654