1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for OpenMP runtime code generation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGOpenMPRuntime.h"
17 #include "CGRecordLayout.h"
18 #include "CodeGenFunction.h"
19 #include "clang/CodeGen/ConstantInitBuilder.h"
20 #include "clang/AST/Decl.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "clang/Basic/BitmaskEnum.h"
23 #include "llvm/ADT/ArrayRef.h"
24 #include "llvm/Bitcode/BitcodeReader.h"
25 #include "llvm/IR/CallSite.h"
26 #include "llvm/IR/DerivedTypes.h"
27 #include "llvm/IR/GlobalValue.h"
28 #include "llvm/IR/Value.h"
29 #include "llvm/Support/Format.h"
30 #include "llvm/Support/raw_ostream.h"
31 #include <cassert>
32 
33 using namespace clang;
34 using namespace CodeGen;
35 
36 namespace {
37 /// Base class for handling code generation inside OpenMP regions.
38 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
39 public:
40   /// Kinds of OpenMP regions used in codegen.
41   enum CGOpenMPRegionKind {
42     /// Region with outlined function for standalone 'parallel'
43     /// directive.
44     ParallelOutlinedRegion,
45     /// Region with outlined function for standalone 'task' directive.
46     TaskOutlinedRegion,
47     /// Region for constructs that do not require function outlining,
48     /// like 'for', 'sections', 'atomic' etc. directives.
49     InlinedRegion,
50     /// Region with outlined function for standalone 'target' directive.
51     TargetRegion,
52   };
53 
54   CGOpenMPRegionInfo(const CapturedStmt &CS,
55                      const CGOpenMPRegionKind RegionKind,
56                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
57                      bool HasCancel)
58       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
59         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
60 
61   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
62                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
63                      bool HasCancel)
64       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
65         Kind(Kind), HasCancel(HasCancel) {}
66 
67   /// Get a variable or parameter for storing global thread id
68   /// inside OpenMP construct.
69   virtual const VarDecl *getThreadIDVariable() const = 0;
70 
71   /// Emit the captured statement body.
72   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
73 
74   /// Get an LValue for the current ThreadID variable.
75   /// \return LValue for thread id variable. This LValue always has type int32*.
76   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
77 
78   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
79 
80   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
81 
82   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
83 
84   bool hasCancel() const { return HasCancel; }
85 
86   static bool classof(const CGCapturedStmtInfo *Info) {
87     return Info->getKind() == CR_OpenMP;
88   }
89 
90   ~CGOpenMPRegionInfo() override = default;
91 
92 protected:
93   CGOpenMPRegionKind RegionKind;
94   RegionCodeGenTy CodeGen;
95   OpenMPDirectiveKind Kind;
96   bool HasCancel;
97 };
98 
99 /// API for captured statement code generation in OpenMP constructs.
100 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
101 public:
102   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
103                              const RegionCodeGenTy &CodeGen,
104                              OpenMPDirectiveKind Kind, bool HasCancel,
105                              StringRef HelperName)
106       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
107                            HasCancel),
108         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
109     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
110   }
111 
112   /// Get a variable or parameter for storing global thread id
113   /// inside OpenMP construct.
114   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
115 
116   /// Get the name of the capture helper.
117   StringRef getHelperName() const override { return HelperName; }
118 
119   static bool classof(const CGCapturedStmtInfo *Info) {
120     return CGOpenMPRegionInfo::classof(Info) &&
121            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
122                ParallelOutlinedRegion;
123   }
124 
125 private:
126   /// A variable or parameter storing global thread id for OpenMP
127   /// constructs.
128   const VarDecl *ThreadIDVar;
129   StringRef HelperName;
130 };
131 
132 /// API for captured statement code generation in OpenMP constructs.
133 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
134 public:
135   class UntiedTaskActionTy final : public PrePostActionTy {
136     bool Untied;
137     const VarDecl *PartIDVar;
138     const RegionCodeGenTy UntiedCodeGen;
139     llvm::SwitchInst *UntiedSwitch = nullptr;
140 
141   public:
142     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
143                        const RegionCodeGenTy &UntiedCodeGen)
144         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
145     void Enter(CodeGenFunction &CGF) override {
146       if (Untied) {
147         // Emit task switching point.
148         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
149             CGF.GetAddrOfLocalVar(PartIDVar),
150             PartIDVar->getType()->castAs<PointerType>());
151         llvm::Value *Res =
152             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
153         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
154         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
155         CGF.EmitBlock(DoneBB);
156         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
157         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
158         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
159                               CGF.Builder.GetInsertBlock());
160         emitUntiedSwitch(CGF);
161       }
162     }
163     void emitUntiedSwitch(CodeGenFunction &CGF) const {
164       if (Untied) {
165         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
166             CGF.GetAddrOfLocalVar(PartIDVar),
167             PartIDVar->getType()->castAs<PointerType>());
168         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
169                               PartIdLVal);
170         UntiedCodeGen(CGF);
171         CodeGenFunction::JumpDest CurPoint =
172             CGF.getJumpDestInCurrentScope(".untied.next.");
173         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
174         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
175         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
176                               CGF.Builder.GetInsertBlock());
177         CGF.EmitBranchThroughCleanup(CurPoint);
178         CGF.EmitBlock(CurPoint.getBlock());
179       }
180     }
181     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
182   };
183   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
184                                  const VarDecl *ThreadIDVar,
185                                  const RegionCodeGenTy &CodeGen,
186                                  OpenMPDirectiveKind Kind, bool HasCancel,
187                                  const UntiedTaskActionTy &Action)
188       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
189         ThreadIDVar(ThreadIDVar), Action(Action) {
190     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
191   }
192 
193   /// Get a variable or parameter for storing global thread id
194   /// inside OpenMP construct.
195   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
196 
197   /// Get an LValue for the current ThreadID variable.
198   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
199 
200   /// Get the name of the capture helper.
201   StringRef getHelperName() const override { return ".omp_outlined."; }
202 
203   void emitUntiedSwitch(CodeGenFunction &CGF) override {
204     Action.emitUntiedSwitch(CGF);
205   }
206 
207   static bool classof(const CGCapturedStmtInfo *Info) {
208     return CGOpenMPRegionInfo::classof(Info) &&
209            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
210                TaskOutlinedRegion;
211   }
212 
213 private:
214   /// A variable or parameter storing global thread id for OpenMP
215   /// constructs.
216   const VarDecl *ThreadIDVar;
217   /// Action for emitting code for untied tasks.
218   const UntiedTaskActionTy &Action;
219 };
220 
221 /// API for inlined captured statement code generation in OpenMP
222 /// constructs.
223 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
224 public:
225   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
226                             const RegionCodeGenTy &CodeGen,
227                             OpenMPDirectiveKind Kind, bool HasCancel)
228       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
229         OldCSI(OldCSI),
230         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
231 
232   // Retrieve the value of the context parameter.
233   llvm::Value *getContextValue() const override {
234     if (OuterRegionInfo)
235       return OuterRegionInfo->getContextValue();
236     llvm_unreachable("No context value for inlined OpenMP region");
237   }
238 
239   void setContextValue(llvm::Value *V) override {
240     if (OuterRegionInfo) {
241       OuterRegionInfo->setContextValue(V);
242       return;
243     }
244     llvm_unreachable("No context value for inlined OpenMP region");
245   }
246 
247   /// Lookup the captured field decl for a variable.
248   const FieldDecl *lookup(const VarDecl *VD) const override {
249     if (OuterRegionInfo)
250       return OuterRegionInfo->lookup(VD);
251     // If there is no outer outlined region,no need to lookup in a list of
252     // captured variables, we can use the original one.
253     return nullptr;
254   }
255 
256   FieldDecl *getThisFieldDecl() const override {
257     if (OuterRegionInfo)
258       return OuterRegionInfo->getThisFieldDecl();
259     return nullptr;
260   }
261 
262   /// Get a variable or parameter for storing global thread id
263   /// inside OpenMP construct.
264   const VarDecl *getThreadIDVariable() const override {
265     if (OuterRegionInfo)
266       return OuterRegionInfo->getThreadIDVariable();
267     return nullptr;
268   }
269 
270   /// Get an LValue for the current ThreadID variable.
271   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
272     if (OuterRegionInfo)
273       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
274     llvm_unreachable("No LValue for inlined OpenMP construct");
275   }
276 
277   /// Get the name of the capture helper.
278   StringRef getHelperName() const override {
279     if (auto *OuterRegionInfo = getOldCSI())
280       return OuterRegionInfo->getHelperName();
281     llvm_unreachable("No helper name for inlined OpenMP construct");
282   }
283 
284   void emitUntiedSwitch(CodeGenFunction &CGF) override {
285     if (OuterRegionInfo)
286       OuterRegionInfo->emitUntiedSwitch(CGF);
287   }
288 
289   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
290 
291   static bool classof(const CGCapturedStmtInfo *Info) {
292     return CGOpenMPRegionInfo::classof(Info) &&
293            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
294   }
295 
296   ~CGOpenMPInlinedRegionInfo() override = default;
297 
298 private:
299   /// CodeGen info about outer OpenMP region.
300   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
301   CGOpenMPRegionInfo *OuterRegionInfo;
302 };
303 
304 /// API for captured statement code generation in OpenMP target
305 /// constructs. For this captures, implicit parameters are used instead of the
306 /// captured fields. The name of the target region has to be unique in a given
307 /// application so it is provided by the client, because only the client has
308 /// the information to generate that.
309 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
310 public:
311   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
312                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
313       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
314                            /*HasCancel=*/false),
315         HelperName(HelperName) {}
316 
317   /// This is unused for target regions because each starts executing
318   /// with a single thread.
319   const VarDecl *getThreadIDVariable() const override { return nullptr; }
320 
321   /// Get the name of the capture helper.
322   StringRef getHelperName() const override { return HelperName; }
323 
324   static bool classof(const CGCapturedStmtInfo *Info) {
325     return CGOpenMPRegionInfo::classof(Info) &&
326            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
327   }
328 
329 private:
330   StringRef HelperName;
331 };
332 
333 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
334   llvm_unreachable("No codegen for expressions");
335 }
336 /// API for generation of expressions captured in a innermost OpenMP
337 /// region.
338 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
339 public:
340   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
341       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
342                                   OMPD_unknown,
343                                   /*HasCancel=*/false),
344         PrivScope(CGF) {
345     // Make sure the globals captured in the provided statement are local by
346     // using the privatization logic. We assume the same variable is not
347     // captured more than once.
348     for (const auto &C : CS.captures()) {
349       if (!C.capturesVariable() && !C.capturesVariableByCopy())
350         continue;
351 
352       const VarDecl *VD = C.getCapturedVar();
353       if (VD->isLocalVarDeclOrParm())
354         continue;
355 
356       DeclRefExpr DRE(const_cast<VarDecl *>(VD),
357                       /*RefersToEnclosingVariableOrCapture=*/false,
358                       VD->getType().getNonReferenceType(), VK_LValue,
359                       C.getLocation());
360       PrivScope.addPrivate(
361           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); });
362     }
363     (void)PrivScope.Privatize();
364   }
365 
366   /// Lookup the captured field decl for a variable.
367   const FieldDecl *lookup(const VarDecl *VD) const override {
368     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
369       return FD;
370     return nullptr;
371   }
372 
373   /// Emit the captured statement body.
374   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
375     llvm_unreachable("No body for expressions");
376   }
377 
378   /// Get a variable or parameter for storing global thread id
379   /// inside OpenMP construct.
380   const VarDecl *getThreadIDVariable() const override {
381     llvm_unreachable("No thread id for expressions");
382   }
383 
384   /// Get the name of the capture helper.
385   StringRef getHelperName() const override {
386     llvm_unreachable("No helper name for expressions");
387   }
388 
389   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
390 
391 private:
392   /// Private scope to capture global variables.
393   CodeGenFunction::OMPPrivateScope PrivScope;
394 };
395 
396 /// RAII for emitting code of OpenMP constructs.
397 class InlinedOpenMPRegionRAII {
398   CodeGenFunction &CGF;
399   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
400   FieldDecl *LambdaThisCaptureField = nullptr;
401   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
402 
403 public:
404   /// Constructs region for combined constructs.
405   /// \param CodeGen Code generation sequence for combined directives. Includes
406   /// a list of functions used for code generation of implicitly inlined
407   /// regions.
408   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
409                           OpenMPDirectiveKind Kind, bool HasCancel)
410       : CGF(CGF) {
411     // Start emission for the construct.
412     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
413         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
414     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
415     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
416     CGF.LambdaThisCaptureField = nullptr;
417     BlockInfo = CGF.BlockInfo;
418     CGF.BlockInfo = nullptr;
419   }
420 
421   ~InlinedOpenMPRegionRAII() {
422     // Restore original CapturedStmtInfo only if we're done with code emission.
423     auto *OldCSI =
424         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
425     delete CGF.CapturedStmtInfo;
426     CGF.CapturedStmtInfo = OldCSI;
427     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
428     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
429     CGF.BlockInfo = BlockInfo;
430   }
431 };
432 
433 /// Values for bit flags used in the ident_t to describe the fields.
434 /// All enumeric elements are named and described in accordance with the code
435 /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
436 enum OpenMPLocationFlags : unsigned {
437   /// Use trampoline for internal microtask.
438   OMP_IDENT_IMD = 0x01,
439   /// Use c-style ident structure.
440   OMP_IDENT_KMPC = 0x02,
441   /// Atomic reduction option for kmpc_reduce.
442   OMP_ATOMIC_REDUCE = 0x10,
443   /// Explicit 'barrier' directive.
444   OMP_IDENT_BARRIER_EXPL = 0x20,
445   /// Implicit barrier in code.
446   OMP_IDENT_BARRIER_IMPL = 0x40,
447   /// Implicit barrier in 'for' directive.
448   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
449   /// Implicit barrier in 'sections' directive.
450   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
451   /// Implicit barrier in 'single' directive.
452   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
453   /// Call of __kmp_for_static_init for static loop.
454   OMP_IDENT_WORK_LOOP = 0x200,
455   /// Call of __kmp_for_static_init for sections.
456   OMP_IDENT_WORK_SECTIONS = 0x400,
457   /// Call of __kmp_for_static_init for distribute.
458   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
459   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
460 };
461 
462 /// Describes ident structure that describes a source location.
463 /// All descriptions are taken from
464 /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
465 /// Original structure:
466 /// typedef struct ident {
467 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
468 ///                                  see above  */
469 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
470 ///                                  KMP_IDENT_KMPC identifies this union
471 ///                                  member  */
472 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
473 ///                                  see above */
474 ///#if USE_ITT_BUILD
475 ///                            /*  but currently used for storing
476 ///                                region-specific ITT */
477 ///                            /*  contextual information. */
478 ///#endif /* USE_ITT_BUILD */
479 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
480 ///                                 C++  */
481 ///    char const *psource;    /**< String describing the source location.
482 ///                            The string is composed of semi-colon separated
483 //                             fields which describe the source file,
484 ///                            the function and a pair of line numbers that
485 ///                            delimit the construct.
486 ///                             */
487 /// } ident_t;
488 enum IdentFieldIndex {
489   /// might be used in Fortran
490   IdentField_Reserved_1,
491   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
492   IdentField_Flags,
493   /// Not really used in Fortran any more
494   IdentField_Reserved_2,
495   /// Source[4] in Fortran, do not use for C++
496   IdentField_Reserved_3,
497   /// String describing the source location. The string is composed of
498   /// semi-colon separated fields which describe the source file, the function
499   /// and a pair of line numbers that delimit the construct.
500   IdentField_PSource
501 };
502 
503 /// Schedule types for 'omp for' loops (these enumerators are taken from
504 /// the enum sched_type in kmp.h).
505 enum OpenMPSchedType {
506   /// Lower bound for default (unordered) versions.
507   OMP_sch_lower = 32,
508   OMP_sch_static_chunked = 33,
509   OMP_sch_static = 34,
510   OMP_sch_dynamic_chunked = 35,
511   OMP_sch_guided_chunked = 36,
512   OMP_sch_runtime = 37,
513   OMP_sch_auto = 38,
514   /// static with chunk adjustment (e.g., simd)
515   OMP_sch_static_balanced_chunked = 45,
516   /// Lower bound for 'ordered' versions.
517   OMP_ord_lower = 64,
518   OMP_ord_static_chunked = 65,
519   OMP_ord_static = 66,
520   OMP_ord_dynamic_chunked = 67,
521   OMP_ord_guided_chunked = 68,
522   OMP_ord_runtime = 69,
523   OMP_ord_auto = 70,
524   OMP_sch_default = OMP_sch_static,
525   /// dist_schedule types
526   OMP_dist_sch_static_chunked = 91,
527   OMP_dist_sch_static = 92,
528   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
529   /// Set if the monotonic schedule modifier was present.
530   OMP_sch_modifier_monotonic = (1 << 29),
531   /// Set if the nonmonotonic schedule modifier was present.
532   OMP_sch_modifier_nonmonotonic = (1 << 30),
533 };
534 
535 enum OpenMPRTLFunction {
536   /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
537   /// kmpc_micro microtask, ...);
538   OMPRTL__kmpc_fork_call,
539   /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
540   /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
541   OMPRTL__kmpc_threadprivate_cached,
542   /// Call to void __kmpc_threadprivate_register( ident_t *,
543   /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
544   OMPRTL__kmpc_threadprivate_register,
545   // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
546   OMPRTL__kmpc_global_thread_num,
547   // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
548   // kmp_critical_name *crit);
549   OMPRTL__kmpc_critical,
550   // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
551   // global_tid, kmp_critical_name *crit, uintptr_t hint);
552   OMPRTL__kmpc_critical_with_hint,
553   // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
554   // kmp_critical_name *crit);
555   OMPRTL__kmpc_end_critical,
556   // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
557   // global_tid);
558   OMPRTL__kmpc_cancel_barrier,
559   // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
560   OMPRTL__kmpc_barrier,
561   // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
562   OMPRTL__kmpc_for_static_fini,
563   // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
564   // global_tid);
565   OMPRTL__kmpc_serialized_parallel,
566   // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
567   // global_tid);
568   OMPRTL__kmpc_end_serialized_parallel,
569   // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
570   // kmp_int32 num_threads);
571   OMPRTL__kmpc_push_num_threads,
572   // Call to void __kmpc_flush(ident_t *loc);
573   OMPRTL__kmpc_flush,
574   // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
575   OMPRTL__kmpc_master,
576   // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
577   OMPRTL__kmpc_end_master,
578   // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
579   // int end_part);
580   OMPRTL__kmpc_omp_taskyield,
581   // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
582   OMPRTL__kmpc_single,
583   // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
584   OMPRTL__kmpc_end_single,
585   // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
586   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
587   // kmp_routine_entry_t *task_entry);
588   OMPRTL__kmpc_omp_task_alloc,
589   // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
590   // new_task);
591   OMPRTL__kmpc_omp_task,
592   // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
593   // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
594   // kmp_int32 didit);
595   OMPRTL__kmpc_copyprivate,
596   // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
597   // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
598   // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
599   OMPRTL__kmpc_reduce,
600   // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
601   // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
602   // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
603   // *lck);
604   OMPRTL__kmpc_reduce_nowait,
605   // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
606   // kmp_critical_name *lck);
607   OMPRTL__kmpc_end_reduce,
608   // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
609   // kmp_critical_name *lck);
610   OMPRTL__kmpc_end_reduce_nowait,
611   // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
612   // kmp_task_t * new_task);
613   OMPRTL__kmpc_omp_task_begin_if0,
614   // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
615   // kmp_task_t * new_task);
616   OMPRTL__kmpc_omp_task_complete_if0,
617   // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
618   OMPRTL__kmpc_ordered,
619   // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
620   OMPRTL__kmpc_end_ordered,
621   // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
622   // global_tid);
623   OMPRTL__kmpc_omp_taskwait,
624   // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
625   OMPRTL__kmpc_taskgroup,
626   // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
627   OMPRTL__kmpc_end_taskgroup,
628   // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
629   // int proc_bind);
630   OMPRTL__kmpc_push_proc_bind,
631   // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
632   // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
633   // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
634   OMPRTL__kmpc_omp_task_with_deps,
635   // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
636   // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
637   // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
638   OMPRTL__kmpc_omp_wait_deps,
639   // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
640   // global_tid, kmp_int32 cncl_kind);
641   OMPRTL__kmpc_cancellationpoint,
642   // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
643   // kmp_int32 cncl_kind);
644   OMPRTL__kmpc_cancel,
645   // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
646   // kmp_int32 num_teams, kmp_int32 thread_limit);
647   OMPRTL__kmpc_push_num_teams,
648   // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
649   // microtask, ...);
650   OMPRTL__kmpc_fork_teams,
651   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
652   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
653   // sched, kmp_uint64 grainsize, void *task_dup);
654   OMPRTL__kmpc_taskloop,
655   // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
656   // num_dims, struct kmp_dim *dims);
657   OMPRTL__kmpc_doacross_init,
658   // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
659   OMPRTL__kmpc_doacross_fini,
660   // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
661   // *vec);
662   OMPRTL__kmpc_doacross_post,
663   // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
664   // *vec);
665   OMPRTL__kmpc_doacross_wait,
666   // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
667   // *data);
668   OMPRTL__kmpc_task_reduction_init,
669   // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
670   // *d);
671   OMPRTL__kmpc_task_reduction_get_th_data,
672 
673   //
674   // Offloading related calls
675   //
676   // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
677   // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
678   // *arg_types);
679   OMPRTL__tgt_target,
680   // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
681   // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
682   // *arg_types);
683   OMPRTL__tgt_target_nowait,
684   // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
685   // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
686   // *arg_types, int32_t num_teams, int32_t thread_limit);
687   OMPRTL__tgt_target_teams,
688   // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
689   // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t
690   // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
691   OMPRTL__tgt_target_teams_nowait,
692   // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
693   OMPRTL__tgt_register_lib,
694   // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
695   OMPRTL__tgt_unregister_lib,
696   // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
697   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
698   OMPRTL__tgt_target_data_begin,
699   // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
700   // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
701   // *arg_types);
702   OMPRTL__tgt_target_data_begin_nowait,
703   // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
704   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
705   OMPRTL__tgt_target_data_end,
706   // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
707   // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
708   // *arg_types);
709   OMPRTL__tgt_target_data_end_nowait,
710   // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
711   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
712   OMPRTL__tgt_target_data_update,
713   // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
714   // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
715   // *arg_types);
716   OMPRTL__tgt_target_data_update_nowait,
717 };
718 
719 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
720 /// region.
721 class CleanupTy final : public EHScopeStack::Cleanup {
722   PrePostActionTy *Action;
723 
724 public:
725   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
726   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
727     if (!CGF.HaveInsertPoint())
728       return;
729     Action->Exit(CGF);
730   }
731 };
732 
733 } // anonymous namespace
734 
735 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
736   CodeGenFunction::RunCleanupsScope Scope(CGF);
737   if (PrePostAction) {
738     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
739     Callback(CodeGen, CGF, *PrePostAction);
740   } else {
741     PrePostActionTy Action;
742     Callback(CodeGen, CGF, Action);
743   }
744 }
745 
746 /// Check if the combiner is a call to UDR combiner and if it is so return the
747 /// UDR decl used for reduction.
748 static const OMPDeclareReductionDecl *
749 getReductionInit(const Expr *ReductionOp) {
750   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
751     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
752       if (const auto *DRE =
753               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
754         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
755           return DRD;
756   return nullptr;
757 }
758 
759 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
760                                              const OMPDeclareReductionDecl *DRD,
761                                              const Expr *InitOp,
762                                              Address Private, Address Original,
763                                              QualType Ty) {
764   if (DRD->getInitializer()) {
765     std::pair<llvm::Function *, llvm::Function *> Reduction =
766         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
767     const auto *CE = cast<CallExpr>(InitOp);
768     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
769     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
770     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
771     const auto *LHSDRE =
772         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
773     const auto *RHSDRE =
774         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
775     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
776     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
777                             [=]() { return Private; });
778     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
779                             [=]() { return Original; });
780     (void)PrivateScope.Privatize();
781     RValue Func = RValue::get(Reduction.second);
782     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
783     CGF.EmitIgnoredExpr(InitOp);
784   } else {
785     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
786     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
787     auto *GV = new llvm::GlobalVariable(
788         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
789         llvm::GlobalValue::PrivateLinkage, Init, Name);
790     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
791     RValue InitRVal;
792     switch (CGF.getEvaluationKind(Ty)) {
793     case TEK_Scalar:
794       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
795       break;
796     case TEK_Complex:
797       InitRVal =
798           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
799       break;
800     case TEK_Aggregate:
801       InitRVal = RValue::getAggregate(LV.getAddress());
802       break;
803     }
804     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
805     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
806     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
807                          /*IsInitializer=*/false);
808   }
809 }
810 
811 /// Emit initialization of arrays of complex types.
812 /// \param DestAddr Address of the array.
813 /// \param Type Type of array.
814 /// \param Init Initial expression of array.
815 /// \param SrcAddr Address of the original array.
816 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
817                                  QualType Type, bool EmitDeclareReductionInit,
818                                  const Expr *Init,
819                                  const OMPDeclareReductionDecl *DRD,
820                                  Address SrcAddr = Address::invalid()) {
821   // Perform element-by-element initialization.
822   QualType ElementTy;
823 
824   // Drill down to the base element type on both arrays.
825   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
826   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
827   DestAddr =
828       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
829   if (DRD)
830     SrcAddr =
831         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
832 
833   llvm::Value *SrcBegin = nullptr;
834   if (DRD)
835     SrcBegin = SrcAddr.getPointer();
836   llvm::Value *DestBegin = DestAddr.getPointer();
837   // Cast from pointer to array type to pointer to single element.
838   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
839   // The basic structure here is a while-do loop.
840   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
841   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
842   llvm::Value *IsEmpty =
843       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
844   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
845 
846   // Enter the loop body, making that address the current address.
847   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
848   CGF.EmitBlock(BodyBB);
849 
850   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
851 
852   llvm::PHINode *SrcElementPHI = nullptr;
853   Address SrcElementCurrent = Address::invalid();
854   if (DRD) {
855     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
856                                           "omp.arraycpy.srcElementPast");
857     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
858     SrcElementCurrent =
859         Address(SrcElementPHI,
860                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
861   }
862   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
863       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
864   DestElementPHI->addIncoming(DestBegin, EntryBB);
865   Address DestElementCurrent =
866       Address(DestElementPHI,
867               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
868 
869   // Emit copy.
870   {
871     CodeGenFunction::RunCleanupsScope InitScope(CGF);
872     if (EmitDeclareReductionInit) {
873       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
874                                        SrcElementCurrent, ElementTy);
875     } else
876       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
877                            /*IsInitializer=*/false);
878   }
879 
880   if (DRD) {
881     // Shift the address forward by one element.
882     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
883         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
884     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
885   }
886 
887   // Shift the address forward by one element.
888   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
889       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
890   // Check whether we've reached the end.
891   llvm::Value *Done =
892       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
893   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
894   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
895 
896   // Done.
897   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
898 }
899 
900 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
901   return CGF.EmitOMPSharedLValue(E);
902 }
903 
904 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
905                                             const Expr *E) {
906   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
907     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
908   return LValue();
909 }
910 
911 void ReductionCodeGen::emitAggregateInitialization(
912     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
913     const OMPDeclareReductionDecl *DRD) {
914   // Emit VarDecl with copy init for arrays.
915   // Get the address of the original variable captured in current
916   // captured region.
917   const auto *PrivateVD =
918       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
919   bool EmitDeclareReductionInit =
920       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
921   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
922                        EmitDeclareReductionInit,
923                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
924                                                 : PrivateVD->getInit(),
925                        DRD, SharedLVal.getAddress());
926 }
927 
928 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
929                                    ArrayRef<const Expr *> Privates,
930                                    ArrayRef<const Expr *> ReductionOps) {
931   ClausesData.reserve(Shareds.size());
932   SharedAddresses.reserve(Shareds.size());
933   Sizes.reserve(Shareds.size());
934   BaseDecls.reserve(Shareds.size());
935   auto IPriv = Privates.begin();
936   auto IRed = ReductionOps.begin();
937   for (const Expr *Ref : Shareds) {
938     ClausesData.emplace_back(Ref, *IPriv, *IRed);
939     std::advance(IPriv, 1);
940     std::advance(IRed, 1);
941   }
942 }
943 
944 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
945   assert(SharedAddresses.size() == N &&
946          "Number of generated lvalues must be exactly N.");
947   LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
948   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
949   SharedAddresses.emplace_back(First, Second);
950 }
951 
952 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
953   const auto *PrivateVD =
954       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
955   QualType PrivateType = PrivateVD->getType();
956   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
957   if (!PrivateType->isVariablyModifiedType()) {
958     Sizes.emplace_back(
959         CGF.getTypeSize(
960             SharedAddresses[N].first.getType().getNonReferenceType()),
961         nullptr);
962     return;
963   }
964   llvm::Value *Size;
965   llvm::Value *SizeInChars;
966   auto *ElemType =
967       cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType())
968           ->getElementType();
969   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
970   if (AsArraySection) {
971     Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
972                                      SharedAddresses[N].first.getPointer());
973     Size = CGF.Builder.CreateNUWAdd(
974         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
975     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
976   } else {
977     SizeInChars = CGF.getTypeSize(
978         SharedAddresses[N].first.getType().getNonReferenceType());
979     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
980   }
981   Sizes.emplace_back(SizeInChars, Size);
982   CodeGenFunction::OpaqueValueMapping OpaqueMap(
983       CGF,
984       cast<OpaqueValueExpr>(
985           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
986       RValue::get(Size));
987   CGF.EmitVariablyModifiedType(PrivateType);
988 }
989 
990 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
991                                          llvm::Value *Size) {
992   const auto *PrivateVD =
993       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
994   QualType PrivateType = PrivateVD->getType();
995   if (!PrivateType->isVariablyModifiedType()) {
996     assert(!Size && !Sizes[N].second &&
997            "Size should be nullptr for non-variably modified reduction "
998            "items.");
999     return;
1000   }
1001   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1002       CGF,
1003       cast<OpaqueValueExpr>(
1004           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1005       RValue::get(Size));
1006   CGF.EmitVariablyModifiedType(PrivateType);
1007 }
1008 
1009 void ReductionCodeGen::emitInitialization(
1010     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1011     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1012   assert(SharedAddresses.size() > N && "No variable was generated");
1013   const auto *PrivateVD =
1014       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1015   const OMPDeclareReductionDecl *DRD =
1016       getReductionInit(ClausesData[N].ReductionOp);
1017   QualType PrivateType = PrivateVD->getType();
1018   PrivateAddr = CGF.Builder.CreateElementBitCast(
1019       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1020   QualType SharedType = SharedAddresses[N].first.getType();
1021   SharedLVal = CGF.MakeAddrLValue(
1022       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
1023                                        CGF.ConvertTypeForMem(SharedType)),
1024       SharedType, SharedAddresses[N].first.getBaseInfo(),
1025       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1026   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1027     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1028   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1029     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1030                                      PrivateAddr, SharedLVal.getAddress(),
1031                                      SharedLVal.getType());
1032   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1033              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1034     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1035                          PrivateVD->getType().getQualifiers(),
1036                          /*IsInitializer=*/false);
1037   }
1038 }
1039 
1040 bool ReductionCodeGen::needCleanups(unsigned N) {
1041   const auto *PrivateVD =
1042       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1043   QualType PrivateType = PrivateVD->getType();
1044   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1045   return DTorKind != QualType::DK_none;
1046 }
1047 
1048 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
1049                                     Address PrivateAddr) {
1050   const auto *PrivateVD =
1051       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1052   QualType PrivateType = PrivateVD->getType();
1053   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1054   if (needCleanups(N)) {
1055     PrivateAddr = CGF.Builder.CreateElementBitCast(
1056         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1057     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1058   }
1059 }
1060 
1061 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1062                           LValue BaseLV) {
1063   BaseTy = BaseTy.getNonReferenceType();
1064   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1065          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1066     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
1067       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
1068     } else {
1069       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
1070       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1071     }
1072     BaseTy = BaseTy->getPointeeType();
1073   }
1074   return CGF.MakeAddrLValue(
1075       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(),
1076                                        CGF.ConvertTypeForMem(ElTy)),
1077       BaseLV.getType(), BaseLV.getBaseInfo(),
1078       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1079 }
1080 
1081 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1082                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1083                           llvm::Value *Addr) {
1084   Address Tmp = Address::invalid();
1085   Address TopTmp = Address::invalid();
1086   Address MostTopTmp = Address::invalid();
1087   BaseTy = BaseTy.getNonReferenceType();
1088   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1089          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1090     Tmp = CGF.CreateMemTemp(BaseTy);
1091     if (TopTmp.isValid())
1092       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1093     else
1094       MostTopTmp = Tmp;
1095     TopTmp = Tmp;
1096     BaseTy = BaseTy->getPointeeType();
1097   }
1098   llvm::Type *Ty = BaseLVType;
1099   if (Tmp.isValid())
1100     Ty = Tmp.getElementType();
1101   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1102   if (Tmp.isValid()) {
1103     CGF.Builder.CreateStore(Addr, Tmp);
1104     return MostTopTmp;
1105   }
1106   return Address(Addr, BaseLVAlignment);
1107 }
1108 
1109 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1110   const VarDecl *OrigVD = nullptr;
1111   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1112     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
1113     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1114       Base = TempOASE->getBase()->IgnoreParenImpCasts();
1115     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1116       Base = TempASE->getBase()->IgnoreParenImpCasts();
1117     DE = cast<DeclRefExpr>(Base);
1118     OrigVD = cast<VarDecl>(DE->getDecl());
1119   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1120     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1121     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1122       Base = TempASE->getBase()->IgnoreParenImpCasts();
1123     DE = cast<DeclRefExpr>(Base);
1124     OrigVD = cast<VarDecl>(DE->getDecl());
1125   }
1126   return OrigVD;
1127 }
1128 
1129 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1130                                                Address PrivateAddr) {
1131   const DeclRefExpr *DE;
1132   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1133     BaseDecls.emplace_back(OrigVD);
1134     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1135     LValue BaseLValue =
1136         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1137                     OriginalBaseLValue);
1138     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1139         BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
1140     llvm::Value *PrivatePointer =
1141         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1142             PrivateAddr.getPointer(),
1143             SharedAddresses[N].first.getAddress().getType());
1144     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1145     return castToBase(CGF, OrigVD->getType(),
1146                       SharedAddresses[N].first.getType(),
1147                       OriginalBaseLValue.getAddress().getType(),
1148                       OriginalBaseLValue.getAlignment(), Ptr);
1149   }
1150   BaseDecls.emplace_back(
1151       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1152   return PrivateAddr;
1153 }
1154 
1155 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1156   const OMPDeclareReductionDecl *DRD =
1157       getReductionInit(ClausesData[N].ReductionOp);
1158   return DRD && DRD->getInitializer();
1159 }
1160 
1161 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1162   return CGF.EmitLoadOfPointerLValue(
1163       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1164       getThreadIDVariable()->getType()->castAs<PointerType>());
1165 }
1166 
1167 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1168   if (!CGF.HaveInsertPoint())
1169     return;
1170   // 1.2.2 OpenMP Language Terminology
1171   // Structured block - An executable statement with a single entry at the
1172   // top and a single exit at the bottom.
1173   // The point of exit cannot be a branch out of the structured block.
1174   // longjmp() and throw() must not violate the entry/exit criteria.
1175   CGF.EHStack.pushTerminate();
1176   CodeGen(CGF);
1177   CGF.EHStack.popTerminate();
1178 }
1179 
1180 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1181     CodeGenFunction &CGF) {
1182   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1183                             getThreadIDVariable()->getType(),
1184                             AlignmentSource::Decl);
1185 }
1186 
1187 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1188                                        QualType FieldTy) {
1189   auto *Field = FieldDecl::Create(
1190       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1191       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1192       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1193   Field->setAccess(AS_public);
1194   DC->addDecl(Field);
1195   return Field;
1196 }
1197 
1198 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1199                                  StringRef Separator)
1200     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1201       OffloadEntriesInfoManager(CGM) {
1202   ASTContext &C = CGM.getContext();
1203   RecordDecl *RD = C.buildImplicitRecord("ident_t");
1204   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1205   RD->startDefinition();
1206   // reserved_1
1207   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1208   // flags
1209   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1210   // reserved_2
1211   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1212   // reserved_3
1213   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1214   // psource
1215   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1216   RD->completeDefinition();
1217   IdentQTy = C.getRecordType(RD);
1218   IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1219   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1220 
1221   loadOffloadInfoMetadata();
1222 }
1223 
1224 void CGOpenMPRuntime::clear() {
1225   InternalVars.clear();
1226 }
1227 
1228 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1229   SmallString<128> Buffer;
1230   llvm::raw_svector_ostream OS(Buffer);
1231   StringRef Sep = FirstSeparator;
1232   for (StringRef Part : Parts) {
1233     OS << Sep << Part;
1234     Sep = Separator;
1235   }
1236   return OS.str();
1237 }
1238 
1239 static llvm::Function *
1240 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1241                           const Expr *CombinerInitializer, const VarDecl *In,
1242                           const VarDecl *Out, bool IsCombiner) {
1243   // void .omp_combiner.(Ty *in, Ty *out);
1244   ASTContext &C = CGM.getContext();
1245   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1246   FunctionArgList Args;
1247   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1248                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1249   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1250                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1251   Args.push_back(&OmpOutParm);
1252   Args.push_back(&OmpInParm);
1253   const CGFunctionInfo &FnInfo =
1254       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1255   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1256   std::string Name = CGM.getOpenMPRuntime().getName(
1257       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1258   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1259                                     Name, &CGM.getModule());
1260   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1261   Fn->removeFnAttr(llvm::Attribute::NoInline);
1262   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1263   Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1264   CodeGenFunction CGF(CGM);
1265   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1266   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1267   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1268                     Out->getLocation());
1269   CodeGenFunction::OMPPrivateScope Scope(CGF);
1270   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1271   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1272     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1273         .getAddress();
1274   });
1275   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1276   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1277     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1278         .getAddress();
1279   });
1280   (void)Scope.Privatize();
1281   if (!IsCombiner && Out->hasInit() &&
1282       !CGF.isTrivialInitializer(Out->getInit())) {
1283     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1284                          Out->getType().getQualifiers(),
1285                          /*IsInitializer=*/true);
1286   }
1287   if (CombinerInitializer)
1288     CGF.EmitIgnoredExpr(CombinerInitializer);
1289   Scope.ForceCleanup();
1290   CGF.FinishFunction();
1291   return Fn;
1292 }
1293 
1294 void CGOpenMPRuntime::emitUserDefinedReduction(
1295     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1296   if (UDRMap.count(D) > 0)
1297     return;
1298   llvm::Function *Combiner = emitCombinerOrInitializer(
1299       CGM, D->getType(), D->getCombiner(),
1300       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1301       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1302       /*IsCombiner=*/true);
1303   llvm::Function *Initializer = nullptr;
1304   if (const Expr *Init = D->getInitializer()) {
1305     Initializer = emitCombinerOrInitializer(
1306         CGM, D->getType(),
1307         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1308                                                                      : nullptr,
1309         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1310         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1311         /*IsCombiner=*/false);
1312   }
1313   UDRMap.try_emplace(D, Combiner, Initializer);
1314   if (CGF) {
1315     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1316     Decls.second.push_back(D);
1317   }
1318 }
1319 
1320 std::pair<llvm::Function *, llvm::Function *>
1321 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1322   auto I = UDRMap.find(D);
1323   if (I != UDRMap.end())
1324     return I->second;
1325   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1326   return UDRMap.lookup(D);
1327 }
1328 
1329 static llvm::Value *emitParallelOrTeamsOutlinedFunction(
1330     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1331     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1332     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1333   assert(ThreadIDVar->getType()->isPointerType() &&
1334          "thread id variable must be of type kmp_int32 *");
1335   CodeGenFunction CGF(CGM, true);
1336   bool HasCancel = false;
1337   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1338     HasCancel = OPD->hasCancel();
1339   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1340     HasCancel = OPSD->hasCancel();
1341   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1342     HasCancel = OPFD->hasCancel();
1343   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1344     HasCancel = OPFD->hasCancel();
1345   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1346     HasCancel = OPFD->hasCancel();
1347   else if (const auto *OPFD =
1348                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1349     HasCancel = OPFD->hasCancel();
1350   else if (const auto *OPFD =
1351                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1352     HasCancel = OPFD->hasCancel();
1353   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1354                                     HasCancel, OutlinedHelperName);
1355   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1356   return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
1357 }
1358 
1359 llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction(
1360     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1361     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1362   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1363   return emitParallelOrTeamsOutlinedFunction(
1364       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1365 }
1366 
1367 llvm::Value *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1368     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1369     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1370   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1371   return emitParallelOrTeamsOutlinedFunction(
1372       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1373 }
1374 
1375 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
1376     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1377     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1378     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1379     bool Tied, unsigned &NumberOfParts) {
1380   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1381                                               PrePostActionTy &) {
1382     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1383     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1384     llvm::Value *TaskArgs[] = {
1385         UpLoc, ThreadID,
1386         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1387                                     TaskTVar->getType()->castAs<PointerType>())
1388             .getPointer()};
1389     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1390   };
1391   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1392                                                             UntiedCodeGen);
1393   CodeGen.setAction(Action);
1394   assert(!ThreadIDVar->getType()->isPointerType() &&
1395          "thread id variable must be of type kmp_int32 for tasks");
1396   const OpenMPDirectiveKind Region =
1397       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1398                                                       : OMPD_task;
1399   const CapturedStmt *CS = D.getCapturedStmt(Region);
1400   const auto *TD = dyn_cast<OMPTaskDirective>(&D);
1401   CodeGenFunction CGF(CGM, true);
1402   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1403                                         InnermostKind,
1404                                         TD ? TD->hasCancel() : false, Action);
1405   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1406   llvm::Value *Res = CGF.GenerateCapturedStmtFunction(*CS);
1407   if (!Tied)
1408     NumberOfParts = Action.getNumberOfParts();
1409   return Res;
1410 }
1411 
1412 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1413                              const RecordDecl *RD, const CGRecordLayout &RL,
1414                              ArrayRef<llvm::Constant *> Data) {
1415   llvm::StructType *StructTy = RL.getLLVMType();
1416   unsigned PrevIdx = 0;
1417   ConstantInitBuilder CIBuilder(CGM);
1418   auto DI = Data.begin();
1419   for (const FieldDecl *FD : RD->fields()) {
1420     unsigned Idx = RL.getLLVMFieldNo(FD);
1421     // Fill the alignment.
1422     for (unsigned I = PrevIdx; I < Idx; ++I)
1423       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1424     PrevIdx = Idx + 1;
1425     Fields.add(*DI);
1426     ++DI;
1427   }
1428 }
1429 
1430 template <class... As>
1431 static llvm::GlobalVariable *
1432 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1433                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1434                    As &&... Args) {
1435   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1436   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1437   ConstantInitBuilder CIBuilder(CGM);
1438   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1439   buildStructValue(Fields, CGM, RD, RL, Data);
1440   return Fields.finishAndCreateGlobal(
1441       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1442       std::forward<As>(Args)...);
1443 }
1444 
1445 template <typename T>
1446 static void
1447 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1448                                          ArrayRef<llvm::Constant *> Data,
1449                                          T &Parent) {
1450   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1451   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1452   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1453   buildStructValue(Fields, CGM, RD, RL, Data);
1454   Fields.finishAndAddTo(Parent);
1455 }
1456 
1457 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1458   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1459   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
1460   if (!Entry) {
1461     if (!DefaultOpenMPPSource) {
1462       // Initialize default location for psource field of ident_t structure of
1463       // all ident_t objects. Format is ";file;function;line;column;;".
1464       // Taken from
1465       // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
1466       DefaultOpenMPPSource =
1467           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1468       DefaultOpenMPPSource =
1469           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1470     }
1471 
1472     llvm::Constant *Data[] = {llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1473                               llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1474                               llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1475                               llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1476                               DefaultOpenMPPSource};
1477     llvm::GlobalValue *DefaultOpenMPLocation =
1478         createGlobalStruct(CGM, IdentQTy, /*IsConstant=*/false, Data, "",
1479                            llvm::GlobalValue::PrivateLinkage);
1480     DefaultOpenMPLocation->setUnnamedAddr(
1481         llvm::GlobalValue::UnnamedAddr::Global);
1482 
1483     OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
1484   }
1485   return Address(Entry, Align);
1486 }
1487 
1488 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1489                                              bool AtCurrentPoint) {
1490   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1491   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1492 
1493   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1494   if (AtCurrentPoint) {
1495     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1496         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1497   } else {
1498     Elem.second.ServiceInsertPt =
1499         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1500     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1501   }
1502 }
1503 
1504 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1505   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1506   if (Elem.second.ServiceInsertPt) {
1507     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1508     Elem.second.ServiceInsertPt = nullptr;
1509     Ptr->eraseFromParent();
1510   }
1511 }
1512 
1513 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1514                                                  SourceLocation Loc,
1515                                                  unsigned Flags) {
1516   Flags |= OMP_IDENT_KMPC;
1517   // If no debug info is generated - return global default location.
1518   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1519       Loc.isInvalid())
1520     return getOrCreateDefaultLocation(Flags).getPointer();
1521 
1522   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1523 
1524   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1525   Address LocValue = Address::invalid();
1526   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1527   if (I != OpenMPLocThreadIDMap.end())
1528     LocValue = Address(I->second.DebugLoc, Align);
1529 
1530   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1531   // GetOpenMPThreadID was called before this routine.
1532   if (!LocValue.isValid()) {
1533     // Generate "ident_t .kmpc_loc.addr;"
1534     Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1535     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1536     Elem.second.DebugLoc = AI.getPointer();
1537     LocValue = AI;
1538 
1539     if (!Elem.second.ServiceInsertPt)
1540       setLocThreadIdInsertPt(CGF);
1541     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1542     CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1543     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1544                              CGF.getTypeSize(IdentQTy));
1545   }
1546 
1547   // char **psource = &.kmpc_loc_<flags>.addr.psource;
1548   LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1549   auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1550   LValue PSource =
1551       CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1552 
1553   llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1554   if (OMPDebugLoc == nullptr) {
1555     SmallString<128> Buffer2;
1556     llvm::raw_svector_ostream OS2(Buffer2);
1557     // Build debug location
1558     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1559     OS2 << ";" << PLoc.getFilename() << ";";
1560     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1561       OS2 << FD->getQualifiedNameAsString();
1562     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1563     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1564     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1565   }
1566   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1567   CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1568 
1569   // Our callers always pass this to a runtime function, so for
1570   // convenience, go ahead and return a naked pointer.
1571   return LocValue.getPointer();
1572 }
1573 
1574 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1575                                           SourceLocation Loc) {
1576   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1577 
1578   llvm::Value *ThreadID = nullptr;
1579   // Check whether we've already cached a load of the thread id in this
1580   // function.
1581   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1582   if (I != OpenMPLocThreadIDMap.end()) {
1583     ThreadID = I->second.ThreadID;
1584     if (ThreadID != nullptr)
1585       return ThreadID;
1586   }
1587   // If exceptions are enabled, do not use parameter to avoid possible crash.
1588   if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1589       !CGF.getLangOpts().CXXExceptions ||
1590       CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1591     if (auto *OMPRegionInfo =
1592             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1593       if (OMPRegionInfo->getThreadIDVariable()) {
1594         // Check if this an outlined function with thread id passed as argument.
1595         LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1596         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1597         // If value loaded in entry block, cache it and use it everywhere in
1598         // function.
1599         if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1600           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1601           Elem.second.ThreadID = ThreadID;
1602         }
1603         return ThreadID;
1604       }
1605     }
1606   }
1607 
1608   // This is not an outlined function region - need to call __kmpc_int32
1609   // kmpc_global_thread_num(ident_t *loc).
1610   // Generate thread id value and cache this value for use across the
1611   // function.
1612   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1613   if (!Elem.second.ServiceInsertPt)
1614     setLocThreadIdInsertPt(CGF);
1615   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1616   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1617   llvm::CallInst *Call = CGF.Builder.CreateCall(
1618       createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1619       emitUpdateLocation(CGF, Loc));
1620   Call->setCallingConv(CGF.getRuntimeCC());
1621   Elem.second.ThreadID = Call;
1622   return Call;
1623 }
1624 
1625 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1626   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1627   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1628     clearLocThreadIdInsertPt(CGF);
1629     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1630   }
1631   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1632     for(auto *D : FunctionUDRMap[CGF.CurFn])
1633       UDRMap.erase(D);
1634     FunctionUDRMap.erase(CGF.CurFn);
1635   }
1636 }
1637 
1638 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1639   return IdentTy->getPointerTo();
1640 }
1641 
1642 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1643   if (!Kmpc_MicroTy) {
1644     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1645     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1646                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1647     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1648   }
1649   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1650 }
1651 
1652 llvm::Constant *
1653 CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1654   llvm::Constant *RTLFn = nullptr;
1655   switch (static_cast<OpenMPRTLFunction>(Function)) {
1656   case OMPRTL__kmpc_fork_call: {
1657     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1658     // microtask, ...);
1659     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1660                                 getKmpc_MicroPointerTy()};
1661     auto *FnTy =
1662         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1663     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1664     break;
1665   }
1666   case OMPRTL__kmpc_global_thread_num: {
1667     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1668     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1669     auto *FnTy =
1670         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1671     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1672     break;
1673   }
1674   case OMPRTL__kmpc_threadprivate_cached: {
1675     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1676     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1677     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1678                                 CGM.VoidPtrTy, CGM.SizeTy,
1679                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1680     auto *FnTy =
1681         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1682     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1683     break;
1684   }
1685   case OMPRTL__kmpc_critical: {
1686     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1687     // kmp_critical_name *crit);
1688     llvm::Type *TypeParams[] = {
1689         getIdentTyPointerTy(), CGM.Int32Ty,
1690         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1691     auto *FnTy =
1692         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1693     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1694     break;
1695   }
1696   case OMPRTL__kmpc_critical_with_hint: {
1697     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1698     // kmp_critical_name *crit, uintptr_t hint);
1699     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1700                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
1701                                 CGM.IntPtrTy};
1702     auto *FnTy =
1703         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1704     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1705     break;
1706   }
1707   case OMPRTL__kmpc_threadprivate_register: {
1708     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1709     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1710     // typedef void *(*kmpc_ctor)(void *);
1711     auto *KmpcCtorTy =
1712         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1713                                 /*isVarArg*/ false)->getPointerTo();
1714     // typedef void *(*kmpc_cctor)(void *, void *);
1715     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1716     auto *KmpcCopyCtorTy =
1717         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1718                                 /*isVarArg*/ false)
1719             ->getPointerTo();
1720     // typedef void (*kmpc_dtor)(void *);
1721     auto *KmpcDtorTy =
1722         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1723             ->getPointerTo();
1724     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1725                               KmpcCopyCtorTy, KmpcDtorTy};
1726     auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1727                                         /*isVarArg*/ false);
1728     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1729     break;
1730   }
1731   case OMPRTL__kmpc_end_critical: {
1732     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1733     // kmp_critical_name *crit);
1734     llvm::Type *TypeParams[] = {
1735         getIdentTyPointerTy(), CGM.Int32Ty,
1736         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1737     auto *FnTy =
1738         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1739     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1740     break;
1741   }
1742   case OMPRTL__kmpc_cancel_barrier: {
1743     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1744     // global_tid);
1745     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1746     auto *FnTy =
1747         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1748     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1749     break;
1750   }
1751   case OMPRTL__kmpc_barrier: {
1752     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1753     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1754     auto *FnTy =
1755         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1756     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1757     break;
1758   }
1759   case OMPRTL__kmpc_for_static_fini: {
1760     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1761     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1762     auto *FnTy =
1763         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1764     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1765     break;
1766   }
1767   case OMPRTL__kmpc_push_num_threads: {
1768     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1769     // kmp_int32 num_threads)
1770     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1771                                 CGM.Int32Ty};
1772     auto *FnTy =
1773         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1774     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1775     break;
1776   }
1777   case OMPRTL__kmpc_serialized_parallel: {
1778     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1779     // global_tid);
1780     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1781     auto *FnTy =
1782         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1783     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1784     break;
1785   }
1786   case OMPRTL__kmpc_end_serialized_parallel: {
1787     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1788     // global_tid);
1789     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1790     auto *FnTy =
1791         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1792     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1793     break;
1794   }
1795   case OMPRTL__kmpc_flush: {
1796     // Build void __kmpc_flush(ident_t *loc);
1797     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1798     auto *FnTy =
1799         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1800     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1801     break;
1802   }
1803   case OMPRTL__kmpc_master: {
1804     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1805     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1806     auto *FnTy =
1807         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1808     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1809     break;
1810   }
1811   case OMPRTL__kmpc_end_master: {
1812     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1813     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1814     auto *FnTy =
1815         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1816     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1817     break;
1818   }
1819   case OMPRTL__kmpc_omp_taskyield: {
1820     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1821     // int end_part);
1822     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1823     auto *FnTy =
1824         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1825     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1826     break;
1827   }
1828   case OMPRTL__kmpc_single: {
1829     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1830     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1831     auto *FnTy =
1832         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1833     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1834     break;
1835   }
1836   case OMPRTL__kmpc_end_single: {
1837     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1838     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1839     auto *FnTy =
1840         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1841     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1842     break;
1843   }
1844   case OMPRTL__kmpc_omp_task_alloc: {
1845     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1846     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1847     // kmp_routine_entry_t *task_entry);
1848     assert(KmpRoutineEntryPtrTy != nullptr &&
1849            "Type kmp_routine_entry_t must be created.");
1850     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1851                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
1852     // Return void * and then cast to particular kmp_task_t type.
1853     auto *FnTy =
1854         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1855     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
1856     break;
1857   }
1858   case OMPRTL__kmpc_omp_task: {
1859     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1860     // *new_task);
1861     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1862                                 CGM.VoidPtrTy};
1863     auto *FnTy =
1864         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1865     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
1866     break;
1867   }
1868   case OMPRTL__kmpc_copyprivate: {
1869     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
1870     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
1871     // kmp_int32 didit);
1872     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1873     auto *CpyFnTy =
1874         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
1875     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
1876                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
1877                                 CGM.Int32Ty};
1878     auto *FnTy =
1879         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1880     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
1881     break;
1882   }
1883   case OMPRTL__kmpc_reduce: {
1884     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
1885     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
1886     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
1887     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1888     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1889                                                /*isVarArg=*/false);
1890     llvm::Type *TypeParams[] = {
1891         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1892         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1893         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1894     auto *FnTy =
1895         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1896     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
1897     break;
1898   }
1899   case OMPRTL__kmpc_reduce_nowait: {
1900     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
1901     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
1902     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
1903     // *lck);
1904     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1905     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1906                                                /*isVarArg=*/false);
1907     llvm::Type *TypeParams[] = {
1908         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1909         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1910         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1911     auto *FnTy =
1912         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1913     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
1914     break;
1915   }
1916   case OMPRTL__kmpc_end_reduce: {
1917     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
1918     // kmp_critical_name *lck);
1919     llvm::Type *TypeParams[] = {
1920         getIdentTyPointerTy(), CGM.Int32Ty,
1921         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1922     auto *FnTy =
1923         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1924     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
1925     break;
1926   }
1927   case OMPRTL__kmpc_end_reduce_nowait: {
1928     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
1929     // kmp_critical_name *lck);
1930     llvm::Type *TypeParams[] = {
1931         getIdentTyPointerTy(), CGM.Int32Ty,
1932         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1933     auto *FnTy =
1934         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1935     RTLFn =
1936         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
1937     break;
1938   }
1939   case OMPRTL__kmpc_omp_task_begin_if0: {
1940     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1941     // *new_task);
1942     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1943                                 CGM.VoidPtrTy};
1944     auto *FnTy =
1945         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1946     RTLFn =
1947         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
1948     break;
1949   }
1950   case OMPRTL__kmpc_omp_task_complete_if0: {
1951     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1952     // *new_task);
1953     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1954                                 CGM.VoidPtrTy};
1955     auto *FnTy =
1956         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1957     RTLFn = CGM.CreateRuntimeFunction(FnTy,
1958                                       /*Name=*/"__kmpc_omp_task_complete_if0");
1959     break;
1960   }
1961   case OMPRTL__kmpc_ordered: {
1962     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
1963     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1964     auto *FnTy =
1965         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1966     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
1967     break;
1968   }
1969   case OMPRTL__kmpc_end_ordered: {
1970     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
1971     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1972     auto *FnTy =
1973         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1974     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
1975     break;
1976   }
1977   case OMPRTL__kmpc_omp_taskwait: {
1978     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
1979     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1980     auto *FnTy =
1981         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1982     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
1983     break;
1984   }
1985   case OMPRTL__kmpc_taskgroup: {
1986     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
1987     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1988     auto *FnTy =
1989         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1990     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
1991     break;
1992   }
1993   case OMPRTL__kmpc_end_taskgroup: {
1994     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
1995     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1996     auto *FnTy =
1997         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1998     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
1999     break;
2000   }
2001   case OMPRTL__kmpc_push_proc_bind: {
2002     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
2003     // int proc_bind)
2004     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2005     auto *FnTy =
2006         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2007     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
2008     break;
2009   }
2010   case OMPRTL__kmpc_omp_task_with_deps: {
2011     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2012     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2013     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
2014     llvm::Type *TypeParams[] = {
2015         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
2016         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
2017     auto *FnTy =
2018         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2019     RTLFn =
2020         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
2021     break;
2022   }
2023   case OMPRTL__kmpc_omp_wait_deps: {
2024     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2025     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
2026     // kmp_depend_info_t *noalias_dep_list);
2027     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2028                                 CGM.Int32Ty,           CGM.VoidPtrTy,
2029                                 CGM.Int32Ty,           CGM.VoidPtrTy};
2030     auto *FnTy =
2031         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2032     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
2033     break;
2034   }
2035   case OMPRTL__kmpc_cancellationpoint: {
2036     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2037     // global_tid, kmp_int32 cncl_kind)
2038     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2039     auto *FnTy =
2040         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2041     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
2042     break;
2043   }
2044   case OMPRTL__kmpc_cancel: {
2045     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2046     // kmp_int32 cncl_kind)
2047     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2048     auto *FnTy =
2049         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2050     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
2051     break;
2052   }
2053   case OMPRTL__kmpc_push_num_teams: {
2054     // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
2055     // kmp_int32 num_teams, kmp_int32 num_threads)
2056     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2057         CGM.Int32Ty};
2058     auto *FnTy =
2059         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2060     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
2061     break;
2062   }
2063   case OMPRTL__kmpc_fork_teams: {
2064     // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
2065     // microtask, ...);
2066     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2067                                 getKmpc_MicroPointerTy()};
2068     auto *FnTy =
2069         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
2070     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
2071     break;
2072   }
2073   case OMPRTL__kmpc_taskloop: {
2074     // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
2075     // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
2076     // sched, kmp_uint64 grainsize, void *task_dup);
2077     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2078                                 CGM.IntTy,
2079                                 CGM.VoidPtrTy,
2080                                 CGM.IntTy,
2081                                 CGM.Int64Ty->getPointerTo(),
2082                                 CGM.Int64Ty->getPointerTo(),
2083                                 CGM.Int64Ty,
2084                                 CGM.IntTy,
2085                                 CGM.IntTy,
2086                                 CGM.Int64Ty,
2087                                 CGM.VoidPtrTy};
2088     auto *FnTy =
2089         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2090     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2091     break;
2092   }
2093   case OMPRTL__kmpc_doacross_init: {
2094     // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2095     // num_dims, struct kmp_dim *dims);
2096     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2097                                 CGM.Int32Ty,
2098                                 CGM.Int32Ty,
2099                                 CGM.VoidPtrTy};
2100     auto *FnTy =
2101         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2102     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2103     break;
2104   }
2105   case OMPRTL__kmpc_doacross_fini: {
2106     // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2107     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2108     auto *FnTy =
2109         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2110     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2111     break;
2112   }
2113   case OMPRTL__kmpc_doacross_post: {
2114     // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2115     // *vec);
2116     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2117                                 CGM.Int64Ty->getPointerTo()};
2118     auto *FnTy =
2119         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2120     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2121     break;
2122   }
2123   case OMPRTL__kmpc_doacross_wait: {
2124     // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2125     // *vec);
2126     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2127                                 CGM.Int64Ty->getPointerTo()};
2128     auto *FnTy =
2129         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2130     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2131     break;
2132   }
2133   case OMPRTL__kmpc_task_reduction_init: {
2134     // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2135     // *data);
2136     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2137     auto *FnTy =
2138         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2139     RTLFn =
2140         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2141     break;
2142   }
2143   case OMPRTL__kmpc_task_reduction_get_th_data: {
2144     // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2145     // *d);
2146     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2147     auto *FnTy =
2148         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2149     RTLFn = CGM.CreateRuntimeFunction(
2150         FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2151     break;
2152   }
2153   case OMPRTL__tgt_target: {
2154     // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2155     // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2156     // *arg_types);
2157     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2158                                 CGM.VoidPtrTy,
2159                                 CGM.Int32Ty,
2160                                 CGM.VoidPtrPtrTy,
2161                                 CGM.VoidPtrPtrTy,
2162                                 CGM.SizeTy->getPointerTo(),
2163                                 CGM.Int64Ty->getPointerTo()};
2164     auto *FnTy =
2165         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2166     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2167     break;
2168   }
2169   case OMPRTL__tgt_target_nowait: {
2170     // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2171     // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
2172     // int64_t *arg_types);
2173     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2174                                 CGM.VoidPtrTy,
2175                                 CGM.Int32Ty,
2176                                 CGM.VoidPtrPtrTy,
2177                                 CGM.VoidPtrPtrTy,
2178                                 CGM.SizeTy->getPointerTo(),
2179                                 CGM.Int64Ty->getPointerTo()};
2180     auto *FnTy =
2181         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2182     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2183     break;
2184   }
2185   case OMPRTL__tgt_target_teams: {
2186     // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2187     // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
2188     // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2189     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2190                                 CGM.VoidPtrTy,
2191                                 CGM.Int32Ty,
2192                                 CGM.VoidPtrPtrTy,
2193                                 CGM.VoidPtrPtrTy,
2194                                 CGM.SizeTy->getPointerTo(),
2195                                 CGM.Int64Ty->getPointerTo(),
2196                                 CGM.Int32Ty,
2197                                 CGM.Int32Ty};
2198     auto *FnTy =
2199         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2200     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2201     break;
2202   }
2203   case OMPRTL__tgt_target_teams_nowait: {
2204     // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2205     // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t
2206     // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2207     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2208                                 CGM.VoidPtrTy,
2209                                 CGM.Int32Ty,
2210                                 CGM.VoidPtrPtrTy,
2211                                 CGM.VoidPtrPtrTy,
2212                                 CGM.SizeTy->getPointerTo(),
2213                                 CGM.Int64Ty->getPointerTo(),
2214                                 CGM.Int32Ty,
2215                                 CGM.Int32Ty};
2216     auto *FnTy =
2217         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2218     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2219     break;
2220   }
2221   case OMPRTL__tgt_register_lib: {
2222     // Build void __tgt_register_lib(__tgt_bin_desc *desc);
2223     QualType ParamTy =
2224         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2225     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2226     auto *FnTy =
2227         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2228     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
2229     break;
2230   }
2231   case OMPRTL__tgt_unregister_lib: {
2232     // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
2233     QualType ParamTy =
2234         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2235     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2236     auto *FnTy =
2237         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2238     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
2239     break;
2240   }
2241   case OMPRTL__tgt_target_data_begin: {
2242     // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2243     // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2244     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2245                                 CGM.Int32Ty,
2246                                 CGM.VoidPtrPtrTy,
2247                                 CGM.VoidPtrPtrTy,
2248                                 CGM.SizeTy->getPointerTo(),
2249                                 CGM.Int64Ty->getPointerTo()};
2250     auto *FnTy =
2251         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2252     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2253     break;
2254   }
2255   case OMPRTL__tgt_target_data_begin_nowait: {
2256     // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2257     // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2258     // *arg_types);
2259     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2260                                 CGM.Int32Ty,
2261                                 CGM.VoidPtrPtrTy,
2262                                 CGM.VoidPtrPtrTy,
2263                                 CGM.SizeTy->getPointerTo(),
2264                                 CGM.Int64Ty->getPointerTo()};
2265     auto *FnTy =
2266         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2267     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2268     break;
2269   }
2270   case OMPRTL__tgt_target_data_end: {
2271     // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2272     // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2273     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2274                                 CGM.Int32Ty,
2275                                 CGM.VoidPtrPtrTy,
2276                                 CGM.VoidPtrPtrTy,
2277                                 CGM.SizeTy->getPointerTo(),
2278                                 CGM.Int64Ty->getPointerTo()};
2279     auto *FnTy =
2280         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2281     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2282     break;
2283   }
2284   case OMPRTL__tgt_target_data_end_nowait: {
2285     // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2286     // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2287     // *arg_types);
2288     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2289                                 CGM.Int32Ty,
2290                                 CGM.VoidPtrPtrTy,
2291                                 CGM.VoidPtrPtrTy,
2292                                 CGM.SizeTy->getPointerTo(),
2293                                 CGM.Int64Ty->getPointerTo()};
2294     auto *FnTy =
2295         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2296     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2297     break;
2298   }
2299   case OMPRTL__tgt_target_data_update: {
2300     // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2301     // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2302     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2303                                 CGM.Int32Ty,
2304                                 CGM.VoidPtrPtrTy,
2305                                 CGM.VoidPtrPtrTy,
2306                                 CGM.SizeTy->getPointerTo(),
2307                                 CGM.Int64Ty->getPointerTo()};
2308     auto *FnTy =
2309         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2310     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2311     break;
2312   }
2313   case OMPRTL__tgt_target_data_update_nowait: {
2314     // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2315     // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2316     // *arg_types);
2317     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2318                                 CGM.Int32Ty,
2319                                 CGM.VoidPtrPtrTy,
2320                                 CGM.VoidPtrPtrTy,
2321                                 CGM.SizeTy->getPointerTo(),
2322                                 CGM.Int64Ty->getPointerTo()};
2323     auto *FnTy =
2324         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2325     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2326     break;
2327   }
2328   }
2329   assert(RTLFn && "Unable to find OpenMP runtime function");
2330   return RTLFn;
2331 }
2332 
2333 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
2334                                                              bool IVSigned) {
2335   assert((IVSize == 32 || IVSize == 64) &&
2336          "IV size is not compatible with the omp runtime");
2337   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2338                                             : "__kmpc_for_static_init_4u")
2339                                 : (IVSigned ? "__kmpc_for_static_init_8"
2340                                             : "__kmpc_for_static_init_8u");
2341   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2342   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2343   llvm::Type *TypeParams[] = {
2344     getIdentTyPointerTy(),                     // loc
2345     CGM.Int32Ty,                               // tid
2346     CGM.Int32Ty,                               // schedtype
2347     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2348     PtrTy,                                     // p_lower
2349     PtrTy,                                     // p_upper
2350     PtrTy,                                     // p_stride
2351     ITy,                                       // incr
2352     ITy                                        // chunk
2353   };
2354   auto *FnTy =
2355       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2356   return CGM.CreateRuntimeFunction(FnTy, Name);
2357 }
2358 
2359 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
2360                                                             bool IVSigned) {
2361   assert((IVSize == 32 || IVSize == 64) &&
2362          "IV size is not compatible with the omp runtime");
2363   StringRef Name =
2364       IVSize == 32
2365           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2366           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2367   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2368   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2369                                CGM.Int32Ty,           // tid
2370                                CGM.Int32Ty,           // schedtype
2371                                ITy,                   // lower
2372                                ITy,                   // upper
2373                                ITy,                   // stride
2374                                ITy                    // chunk
2375   };
2376   auto *FnTy =
2377       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2378   return CGM.CreateRuntimeFunction(FnTy, Name);
2379 }
2380 
2381 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
2382                                                             bool IVSigned) {
2383   assert((IVSize == 32 || IVSize == 64) &&
2384          "IV size is not compatible with the omp runtime");
2385   StringRef Name =
2386       IVSize == 32
2387           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2388           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2389   llvm::Type *TypeParams[] = {
2390       getIdentTyPointerTy(), // loc
2391       CGM.Int32Ty,           // tid
2392   };
2393   auto *FnTy =
2394       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2395   return CGM.CreateRuntimeFunction(FnTy, Name);
2396 }
2397 
2398 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
2399                                                             bool IVSigned) {
2400   assert((IVSize == 32 || IVSize == 64) &&
2401          "IV size is not compatible with the omp runtime");
2402   StringRef Name =
2403       IVSize == 32
2404           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2405           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2406   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2407   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2408   llvm::Type *TypeParams[] = {
2409     getIdentTyPointerTy(),                     // loc
2410     CGM.Int32Ty,                               // tid
2411     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2412     PtrTy,                                     // p_lower
2413     PtrTy,                                     // p_upper
2414     PtrTy                                      // p_stride
2415   };
2416   auto *FnTy =
2417       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2418   return CGM.CreateRuntimeFunction(FnTy, Name);
2419 }
2420 
2421 Address CGOpenMPRuntime::getAddrOfDeclareTargetLink(const VarDecl *VD) {
2422   if (CGM.getLangOpts().OpenMPSimd)
2423     return Address::invalid();
2424   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2425       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2426   if (Res && *Res == OMPDeclareTargetDeclAttr::MT_Link) {
2427     SmallString<64> PtrName;
2428     {
2429       llvm::raw_svector_ostream OS(PtrName);
2430       OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_link_ptr";
2431     }
2432     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2433     if (!Ptr) {
2434       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2435       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
2436                                         PtrName);
2437       if (!CGM.getLangOpts().OpenMPIsDevice) {
2438         auto *GV = cast<llvm::GlobalVariable>(Ptr);
2439         GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
2440         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2441       }
2442       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ptr));
2443       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2444     }
2445     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2446   }
2447   return Address::invalid();
2448 }
2449 
2450 llvm::Constant *
2451 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
2452   assert(!CGM.getLangOpts().OpenMPUseTLS ||
2453          !CGM.getContext().getTargetInfo().isTLSSupported());
2454   // Lookup the entry, lazily creating it if necessary.
2455   std::string Suffix = getName({"cache", ""});
2456   return getOrCreateInternalVariable(
2457       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
2458 }
2459 
2460 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
2461                                                 const VarDecl *VD,
2462                                                 Address VDAddr,
2463                                                 SourceLocation Loc) {
2464   if (CGM.getLangOpts().OpenMPUseTLS &&
2465       CGM.getContext().getTargetInfo().isTLSSupported())
2466     return VDAddr;
2467 
2468   llvm::Type *VarTy = VDAddr.getElementType();
2469   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2470                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2471                                                        CGM.Int8PtrTy),
2472                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
2473                          getOrCreateThreadPrivateCache(VD)};
2474   return Address(CGF.EmitRuntimeCall(
2475       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2476                  VDAddr.getAlignment());
2477 }
2478 
2479 void CGOpenMPRuntime::emitThreadPrivateVarInit(
2480     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2481     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2482   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2483   // library.
2484   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
2485   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
2486                       OMPLoc);
2487   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2488   // to register constructor/destructor for variable.
2489   llvm::Value *Args[] = {
2490       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
2491       Ctor, CopyCtor, Dtor};
2492   CGF.EmitRuntimeCall(
2493       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
2494 }
2495 
2496 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
2497     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2498     bool PerformInit, CodeGenFunction *CGF) {
2499   if (CGM.getLangOpts().OpenMPUseTLS &&
2500       CGM.getContext().getTargetInfo().isTLSSupported())
2501     return nullptr;
2502 
2503   VD = VD->getDefinition(CGM.getContext());
2504   if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
2505     ThreadPrivateWithDefinition.insert(VD);
2506     QualType ASTTy = VD->getType();
2507 
2508     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2509     const Expr *Init = VD->getAnyInitializer();
2510     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2511       // Generate function that re-emits the declaration's initializer into the
2512       // threadprivate copy of the variable VD
2513       CodeGenFunction CtorCGF(CGM);
2514       FunctionArgList Args;
2515       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2516                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2517                             ImplicitParamDecl::Other);
2518       Args.push_back(&Dst);
2519 
2520       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2521           CGM.getContext().VoidPtrTy, Args);
2522       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2523       std::string Name = getName({"__kmpc_global_ctor_", ""});
2524       llvm::Function *Fn =
2525           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2526       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2527                             Args, Loc, Loc);
2528       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
2529           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2530           CGM.getContext().VoidPtrTy, Dst.getLocation());
2531       Address Arg = Address(ArgVal, VDAddr.getAlignment());
2532       Arg = CtorCGF.Builder.CreateElementBitCast(
2533           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2534       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2535                                /*IsInitializer=*/true);
2536       ArgVal = CtorCGF.EmitLoadOfScalar(
2537           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2538           CGM.getContext().VoidPtrTy, Dst.getLocation());
2539       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2540       CtorCGF.FinishFunction();
2541       Ctor = Fn;
2542     }
2543     if (VD->getType().isDestructedType() != QualType::DK_none) {
2544       // Generate function that emits destructor call for the threadprivate copy
2545       // of the variable VD
2546       CodeGenFunction DtorCGF(CGM);
2547       FunctionArgList Args;
2548       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2549                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2550                             ImplicitParamDecl::Other);
2551       Args.push_back(&Dst);
2552 
2553       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2554           CGM.getContext().VoidTy, Args);
2555       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2556       std::string Name = getName({"__kmpc_global_dtor_", ""});
2557       llvm::Function *Fn =
2558           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2559       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2560       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2561                             Loc, Loc);
2562       // Create a scope with an artificial location for the body of this function.
2563       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2564       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
2565           DtorCGF.GetAddrOfLocalVar(&Dst),
2566           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2567       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2568                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2569                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2570       DtorCGF.FinishFunction();
2571       Dtor = Fn;
2572     }
2573     // Do not emit init function if it is not required.
2574     if (!Ctor && !Dtor)
2575       return nullptr;
2576 
2577     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2578     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2579                                                /*isVarArg=*/false)
2580                            ->getPointerTo();
2581     // Copying constructor for the threadprivate variable.
2582     // Must be NULL - reserved by runtime, but currently it requires that this
2583     // parameter is always NULL. Otherwise it fires assertion.
2584     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2585     if (Ctor == nullptr) {
2586       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2587                                              /*isVarArg=*/false)
2588                          ->getPointerTo();
2589       Ctor = llvm::Constant::getNullValue(CtorTy);
2590     }
2591     if (Dtor == nullptr) {
2592       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2593                                              /*isVarArg=*/false)
2594                          ->getPointerTo();
2595       Dtor = llvm::Constant::getNullValue(DtorTy);
2596     }
2597     if (!CGF) {
2598       auto *InitFunctionTy =
2599           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2600       std::string Name = getName({"__omp_threadprivate_init_", ""});
2601       llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2602           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
2603       CodeGenFunction InitCGF(CGM);
2604       FunctionArgList ArgList;
2605       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2606                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
2607                             Loc, Loc);
2608       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2609       InitCGF.FinishFunction();
2610       return InitFunction;
2611     }
2612     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2613   }
2614   return nullptr;
2615 }
2616 
2617 /// Obtain information that uniquely identifies a target entry. This
2618 /// consists of the file and device IDs as well as line number associated with
2619 /// the relevant entry source location.
2620 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
2621                                      unsigned &DeviceID, unsigned &FileID,
2622                                      unsigned &LineNum) {
2623   SourceManager &SM = C.getSourceManager();
2624 
2625   // The loc should be always valid and have a file ID (the user cannot use
2626   // #pragma directives in macros)
2627 
2628   assert(Loc.isValid() && "Source location is expected to be always valid.");
2629 
2630   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2631   assert(PLoc.isValid() && "Source location is expected to be always valid.");
2632 
2633   llvm::sys::fs::UniqueID ID;
2634   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2635     SM.getDiagnostics().Report(diag::err_cannot_open_file)
2636         << PLoc.getFilename() << EC.message();
2637 
2638   DeviceID = ID.getDevice();
2639   FileID = ID.getFile();
2640   LineNum = PLoc.getLine();
2641 }
2642 
2643 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
2644                                                      llvm::GlobalVariable *Addr,
2645                                                      bool PerformInit) {
2646   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2647       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2648   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link)
2649     return CGM.getLangOpts().OpenMPIsDevice;
2650   VD = VD->getDefinition(CGM.getContext());
2651   if (VD && !DeclareTargetWithDefinition.insert(VD).second)
2652     return CGM.getLangOpts().OpenMPIsDevice;
2653 
2654   QualType ASTTy = VD->getType();
2655 
2656   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
2657   // Produce the unique prefix to identify the new target regions. We use
2658   // the source location of the variable declaration which we know to not
2659   // conflict with any target region.
2660   unsigned DeviceID;
2661   unsigned FileID;
2662   unsigned Line;
2663   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2664   SmallString<128> Buffer, Out;
2665   {
2666     llvm::raw_svector_ostream OS(Buffer);
2667     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2668        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2669   }
2670 
2671   const Expr *Init = VD->getAnyInitializer();
2672   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2673     llvm::Constant *Ctor;
2674     llvm::Constant *ID;
2675     if (CGM.getLangOpts().OpenMPIsDevice) {
2676       // Generate function that re-emits the declaration's initializer into
2677       // the threadprivate copy of the variable VD
2678       CodeGenFunction CtorCGF(CGM);
2679 
2680       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2681       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2682       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2683           FTy, Twine(Buffer, "_ctor"), FI, Loc);
2684       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2685       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2686                             FunctionArgList(), Loc, Loc);
2687       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2688       CtorCGF.EmitAnyExprToMem(Init,
2689                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
2690                                Init->getType().getQualifiers(),
2691                                /*IsInitializer=*/true);
2692       CtorCGF.FinishFunction();
2693       Ctor = Fn;
2694       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2695       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
2696     } else {
2697       Ctor = new llvm::GlobalVariable(
2698           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2699           llvm::GlobalValue::PrivateLinkage,
2700           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2701       ID = Ctor;
2702     }
2703 
2704     // Register the information for the entry associated with the constructor.
2705     Out.clear();
2706     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2707         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2708         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
2709   }
2710   if (VD->getType().isDestructedType() != QualType::DK_none) {
2711     llvm::Constant *Dtor;
2712     llvm::Constant *ID;
2713     if (CGM.getLangOpts().OpenMPIsDevice) {
2714       // Generate function that emits destructor call for the threadprivate
2715       // copy of the variable VD
2716       CodeGenFunction DtorCGF(CGM);
2717 
2718       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2719       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2720       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2721           FTy, Twine(Buffer, "_dtor"), FI, Loc);
2722       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2723       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2724                             FunctionArgList(), Loc, Loc);
2725       // Create a scope with an artificial location for the body of this
2726       // function.
2727       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2728       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2729                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2730                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2731       DtorCGF.FinishFunction();
2732       Dtor = Fn;
2733       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2734       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
2735     } else {
2736       Dtor = new llvm::GlobalVariable(
2737           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2738           llvm::GlobalValue::PrivateLinkage,
2739           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2740       ID = Dtor;
2741     }
2742     // Register the information for the entry associated with the destructor.
2743     Out.clear();
2744     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2745         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2746         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2747   }
2748   return CGM.getLangOpts().OpenMPIsDevice;
2749 }
2750 
2751 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2752                                                           QualType VarType,
2753                                                           StringRef Name) {
2754   std::string Suffix = getName({"artificial", ""});
2755   std::string CacheSuffix = getName({"cache", ""});
2756   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2757   llvm::Value *GAddr =
2758       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2759   llvm::Value *Args[] = {
2760       emitUpdateLocation(CGF, SourceLocation()),
2761       getThreadID(CGF, SourceLocation()),
2762       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2763       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2764                                 /*IsSigned=*/false),
2765       getOrCreateInternalVariable(
2766           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2767   return Address(
2768       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2769           CGF.EmitRuntimeCall(
2770               createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2771           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2772       CGM.getPointerAlign());
2773 }
2774 
2775 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
2776                                       const RegionCodeGenTy &ThenGen,
2777                                       const RegionCodeGenTy &ElseGen) {
2778   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2779 
2780   // If the condition constant folds and can be elided, try to avoid emitting
2781   // the condition and the dead arm of the if/else.
2782   bool CondConstant;
2783   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2784     if (CondConstant)
2785       ThenGen(CGF);
2786     else
2787       ElseGen(CGF);
2788     return;
2789   }
2790 
2791   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2792   // emit the conditional branch.
2793   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2794   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2795   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2796   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2797 
2798   // Emit the 'then' code.
2799   CGF.EmitBlock(ThenBlock);
2800   ThenGen(CGF);
2801   CGF.EmitBranch(ContBlock);
2802   // Emit the 'else' code if present.
2803   // There is no need to emit line number for unconditional branch.
2804   (void)ApplyDebugLocation::CreateEmpty(CGF);
2805   CGF.EmitBlock(ElseBlock);
2806   ElseGen(CGF);
2807   // There is no need to emit line number for unconditional branch.
2808   (void)ApplyDebugLocation::CreateEmpty(CGF);
2809   CGF.EmitBranch(ContBlock);
2810   // Emit the continuation block for code after the if.
2811   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2812 }
2813 
2814 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2815                                        llvm::Value *OutlinedFn,
2816                                        ArrayRef<llvm::Value *> CapturedVars,
2817                                        const Expr *IfCond) {
2818   if (!CGF.HaveInsertPoint())
2819     return;
2820   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2821   auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
2822                                                      PrePostActionTy &) {
2823     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2824     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2825     llvm::Value *Args[] = {
2826         RTLoc,
2827         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2828         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2829     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2830     RealArgs.append(std::begin(Args), std::end(Args));
2831     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2832 
2833     llvm::Value *RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
2834     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2835   };
2836   auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
2837                                                           PrePostActionTy &) {
2838     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2839     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2840     // Build calls:
2841     // __kmpc_serialized_parallel(&Loc, GTid);
2842     llvm::Value *Args[] = {RTLoc, ThreadID};
2843     CGF.EmitRuntimeCall(
2844         RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
2845 
2846     // OutlinedFn(&GTid, &zero, CapturedStruct);
2847     Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2848                                                         /*Name*/ ".zero.addr");
2849     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
2850     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2851     // ThreadId for serialized parallels is 0.
2852     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2853     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2854     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2855     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2856 
2857     // __kmpc_end_serialized_parallel(&Loc, GTid);
2858     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2859     CGF.EmitRuntimeCall(
2860         RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
2861         EndArgs);
2862   };
2863   if (IfCond) {
2864     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
2865   } else {
2866     RegionCodeGenTy ThenRCG(ThenGen);
2867     ThenRCG(CGF);
2868   }
2869 }
2870 
2871 // If we're inside an (outlined) parallel region, use the region info's
2872 // thread-ID variable (it is passed in a first argument of the outlined function
2873 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2874 // regular serial code region, get thread ID by calling kmp_int32
2875 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2876 // return the address of that temp.
2877 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2878                                              SourceLocation Loc) {
2879   if (auto *OMPRegionInfo =
2880           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2881     if (OMPRegionInfo->getThreadIDVariable())
2882       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
2883 
2884   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2885   QualType Int32Ty =
2886       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2887   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2888   CGF.EmitStoreOfScalar(ThreadID,
2889                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2890 
2891   return ThreadIDTemp;
2892 }
2893 
2894 llvm::Constant *
2895 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
2896                                              const llvm::Twine &Name) {
2897   SmallString<256> Buffer;
2898   llvm::raw_svector_ostream Out(Buffer);
2899   Out << Name;
2900   StringRef RuntimeName = Out.str();
2901   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2902   if (Elem.second) {
2903     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2904            "OMP internal variable has different type than requested");
2905     return &*Elem.second;
2906   }
2907 
2908   return Elem.second = new llvm::GlobalVariable(
2909              CGM.getModule(), Ty, /*IsConstant*/ false,
2910              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2911              Elem.first());
2912 }
2913 
2914 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2915   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2916   std::string Name = getName({Prefix, "var"});
2917   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2918 }
2919 
2920 namespace {
2921 /// Common pre(post)-action for different OpenMP constructs.
2922 class CommonActionTy final : public PrePostActionTy {
2923   llvm::Value *EnterCallee;
2924   ArrayRef<llvm::Value *> EnterArgs;
2925   llvm::Value *ExitCallee;
2926   ArrayRef<llvm::Value *> ExitArgs;
2927   bool Conditional;
2928   llvm::BasicBlock *ContBlock = nullptr;
2929 
2930 public:
2931   CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs,
2932                  llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs,
2933                  bool Conditional = false)
2934       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2935         ExitArgs(ExitArgs), Conditional(Conditional) {}
2936   void Enter(CodeGenFunction &CGF) override {
2937     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2938     if (Conditional) {
2939       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2940       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2941       ContBlock = CGF.createBasicBlock("omp_if.end");
2942       // Generate the branch (If-stmt)
2943       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2944       CGF.EmitBlock(ThenBlock);
2945     }
2946   }
2947   void Done(CodeGenFunction &CGF) {
2948     // Emit the rest of blocks/branches
2949     CGF.EmitBranch(ContBlock);
2950     CGF.EmitBlock(ContBlock, true);
2951   }
2952   void Exit(CodeGenFunction &CGF) override {
2953     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2954   }
2955 };
2956 } // anonymous namespace
2957 
2958 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2959                                          StringRef CriticalName,
2960                                          const RegionCodeGenTy &CriticalOpGen,
2961                                          SourceLocation Loc, const Expr *Hint) {
2962   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2963   // CriticalOpGen();
2964   // __kmpc_end_critical(ident_t *, gtid, Lock);
2965   // Prepare arguments and build a call to __kmpc_critical
2966   if (!CGF.HaveInsertPoint())
2967     return;
2968   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2969                          getCriticalRegionLock(CriticalName)};
2970   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2971                                                 std::end(Args));
2972   if (Hint) {
2973     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2974         CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
2975   }
2976   CommonActionTy Action(
2977       createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
2978                                  : OMPRTL__kmpc_critical),
2979       EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
2980   CriticalOpGen.setAction(Action);
2981   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2982 }
2983 
2984 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2985                                        const RegionCodeGenTy &MasterOpGen,
2986                                        SourceLocation Loc) {
2987   if (!CGF.HaveInsertPoint())
2988     return;
2989   // if(__kmpc_master(ident_t *, gtid)) {
2990   //   MasterOpGen();
2991   //   __kmpc_end_master(ident_t *, gtid);
2992   // }
2993   // Prepare arguments and build a call to __kmpc_master
2994   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2995   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
2996                         createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
2997                         /*Conditional=*/true);
2998   MasterOpGen.setAction(Action);
2999   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
3000   Action.Done(CGF);
3001 }
3002 
3003 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
3004                                         SourceLocation Loc) {
3005   if (!CGF.HaveInsertPoint())
3006     return;
3007   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
3008   llvm::Value *Args[] = {
3009       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3010       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
3011   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
3012   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3013     Region->emitUntiedSwitch(CGF);
3014 }
3015 
3016 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
3017                                           const RegionCodeGenTy &TaskgroupOpGen,
3018                                           SourceLocation Loc) {
3019   if (!CGF.HaveInsertPoint())
3020     return;
3021   // __kmpc_taskgroup(ident_t *, gtid);
3022   // TaskgroupOpGen();
3023   // __kmpc_end_taskgroup(ident_t *, gtid);
3024   // Prepare arguments and build a call to __kmpc_taskgroup
3025   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3026   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
3027                         createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
3028                         Args);
3029   TaskgroupOpGen.setAction(Action);
3030   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
3031 }
3032 
3033 /// Given an array of pointers to variables, project the address of a
3034 /// given variable.
3035 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
3036                                       unsigned Index, const VarDecl *Var) {
3037   // Pull out the pointer to the variable.
3038   Address PtrAddr =
3039       CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize());
3040   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
3041 
3042   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
3043   Addr = CGF.Builder.CreateElementBitCast(
3044       Addr, CGF.ConvertTypeForMem(Var->getType()));
3045   return Addr;
3046 }
3047 
3048 static llvm::Value *emitCopyprivateCopyFunction(
3049     CodeGenModule &CGM, llvm::Type *ArgsType,
3050     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
3051     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
3052     SourceLocation Loc) {
3053   ASTContext &C = CGM.getContext();
3054   // void copy_func(void *LHSArg, void *RHSArg);
3055   FunctionArgList Args;
3056   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3057                            ImplicitParamDecl::Other);
3058   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3059                            ImplicitParamDecl::Other);
3060   Args.push_back(&LHSArg);
3061   Args.push_back(&RHSArg);
3062   const auto &CGFI =
3063       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3064   std::string Name =
3065       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
3066   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
3067                                     llvm::GlobalValue::InternalLinkage, Name,
3068                                     &CGM.getModule());
3069   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
3070   Fn->setDoesNotRecurse();
3071   CodeGenFunction CGF(CGM);
3072   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
3073   // Dest = (void*[n])(LHSArg);
3074   // Src = (void*[n])(RHSArg);
3075   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3076       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3077       ArgsType), CGF.getPointerAlign());
3078   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3079       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3080       ArgsType), CGF.getPointerAlign());
3081   // *(Type0*)Dst[0] = *(Type0*)Src[0];
3082   // *(Type1*)Dst[1] = *(Type1*)Src[1];
3083   // ...
3084   // *(Typen*)Dst[n] = *(Typen*)Src[n];
3085   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
3086     const auto *DestVar =
3087         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3088     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3089 
3090     const auto *SrcVar =
3091         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3092     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3093 
3094     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3095     QualType Type = VD->getType();
3096     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3097   }
3098   CGF.FinishFunction();
3099   return Fn;
3100 }
3101 
3102 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
3103                                        const RegionCodeGenTy &SingleOpGen,
3104                                        SourceLocation Loc,
3105                                        ArrayRef<const Expr *> CopyprivateVars,
3106                                        ArrayRef<const Expr *> SrcExprs,
3107                                        ArrayRef<const Expr *> DstExprs,
3108                                        ArrayRef<const Expr *> AssignmentOps) {
3109   if (!CGF.HaveInsertPoint())
3110     return;
3111   assert(CopyprivateVars.size() == SrcExprs.size() &&
3112          CopyprivateVars.size() == DstExprs.size() &&
3113          CopyprivateVars.size() == AssignmentOps.size());
3114   ASTContext &C = CGM.getContext();
3115   // int32 did_it = 0;
3116   // if(__kmpc_single(ident_t *, gtid)) {
3117   //   SingleOpGen();
3118   //   __kmpc_end_single(ident_t *, gtid);
3119   //   did_it = 1;
3120   // }
3121   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3122   // <copy_func>, did_it);
3123 
3124   Address DidIt = Address::invalid();
3125   if (!CopyprivateVars.empty()) {
3126     // int32 did_it = 0;
3127     QualType KmpInt32Ty =
3128         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3129     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3130     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3131   }
3132   // Prepare arguments and build a call to __kmpc_single
3133   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3134   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3135                         createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
3136                         /*Conditional=*/true);
3137   SingleOpGen.setAction(Action);
3138   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3139   if (DidIt.isValid()) {
3140     // did_it = 1;
3141     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3142   }
3143   Action.Done(CGF);
3144   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3145   // <copy_func>, did_it);
3146   if (DidIt.isValid()) {
3147     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3148     QualType CopyprivateArrayTy =
3149         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
3150                                /*IndexTypeQuals=*/0);
3151     // Create a list of all private variables for copyprivate.
3152     Address CopyprivateList =
3153         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3154     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
3155       Address Elem = CGF.Builder.CreateConstArrayGEP(
3156           CopyprivateList, I, CGF.getPointerSize());
3157       CGF.Builder.CreateStore(
3158           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3159               CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
3160           Elem);
3161     }
3162     // Build function that copies private values from single region to all other
3163     // threads in the corresponding parallel region.
3164     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
3165         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
3166         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3167     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3168     Address CL =
3169       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3170                                                       CGF.VoidPtrTy);
3171     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
3172     llvm::Value *Args[] = {
3173         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3174         getThreadID(CGF, Loc),        // i32 <gtid>
3175         BufSize,                      // size_t <buf_size>
3176         CL.getPointer(),              // void *<copyprivate list>
3177         CpyFn,                        // void (*) (void *, void *) <copy_func>
3178         DidItVal                      // i32 did_it
3179     };
3180     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
3181   }
3182 }
3183 
3184 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
3185                                         const RegionCodeGenTy &OrderedOpGen,
3186                                         SourceLocation Loc, bool IsThreads) {
3187   if (!CGF.HaveInsertPoint())
3188     return;
3189   // __kmpc_ordered(ident_t *, gtid);
3190   // OrderedOpGen();
3191   // __kmpc_end_ordered(ident_t *, gtid);
3192   // Prepare arguments and build a call to __kmpc_ordered
3193   if (IsThreads) {
3194     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3195     CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3196                           createRuntimeFunction(OMPRTL__kmpc_end_ordered),
3197                           Args);
3198     OrderedOpGen.setAction(Action);
3199     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3200     return;
3201   }
3202   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3203 }
3204 
3205 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
3206                                       OpenMPDirectiveKind Kind, bool EmitChecks,
3207                                       bool ForceSimpleCall) {
3208   if (!CGF.HaveInsertPoint())
3209     return;
3210   // Build call __kmpc_cancel_barrier(loc, thread_id);
3211   // Build call __kmpc_barrier(loc, thread_id);
3212   unsigned Flags;
3213   if (Kind == OMPD_for)
3214     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3215   else if (Kind == OMPD_sections)
3216     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3217   else if (Kind == OMPD_single)
3218     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3219   else if (Kind == OMPD_barrier)
3220     Flags = OMP_IDENT_BARRIER_EXPL;
3221   else
3222     Flags = OMP_IDENT_BARRIER_IMPL;
3223   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3224   // thread_id);
3225   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3226                          getThreadID(CGF, Loc)};
3227   if (auto *OMPRegionInfo =
3228           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
3229     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
3230       llvm::Value *Result = CGF.EmitRuntimeCall(
3231           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
3232       if (EmitChecks) {
3233         // if (__kmpc_cancel_barrier()) {
3234         //   exit from construct;
3235         // }
3236         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
3237         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
3238         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
3239         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3240         CGF.EmitBlock(ExitBB);
3241         //   exit from construct;
3242         CodeGenFunction::JumpDest CancelDestination =
3243             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3244         CGF.EmitBranchThroughCleanup(CancelDestination);
3245         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3246       }
3247       return;
3248     }
3249   }
3250   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
3251 }
3252 
3253 /// Map the OpenMP loop schedule to the runtime enumeration.
3254 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
3255                                           bool Chunked, bool Ordered) {
3256   switch (ScheduleKind) {
3257   case OMPC_SCHEDULE_static:
3258     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
3259                    : (Ordered ? OMP_ord_static : OMP_sch_static);
3260   case OMPC_SCHEDULE_dynamic:
3261     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
3262   case OMPC_SCHEDULE_guided:
3263     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
3264   case OMPC_SCHEDULE_runtime:
3265     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
3266   case OMPC_SCHEDULE_auto:
3267     return Ordered ? OMP_ord_auto : OMP_sch_auto;
3268   case OMPC_SCHEDULE_unknown:
3269     assert(!Chunked && "chunk was specified but schedule kind not known");
3270     return Ordered ? OMP_ord_static : OMP_sch_static;
3271   }
3272   llvm_unreachable("Unexpected runtime schedule");
3273 }
3274 
3275 /// Map the OpenMP distribute schedule to the runtime enumeration.
3276 static OpenMPSchedType
3277 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
3278   // only static is allowed for dist_schedule
3279   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
3280 }
3281 
3282 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
3283                                          bool Chunked) const {
3284   OpenMPSchedType Schedule =
3285       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3286   return Schedule == OMP_sch_static;
3287 }
3288 
3289 bool CGOpenMPRuntime::isStaticNonchunked(
3290     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3291   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3292   return Schedule == OMP_dist_sch_static;
3293 }
3294 
3295 
3296 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
3297   OpenMPSchedType Schedule =
3298       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3299   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3300   return Schedule != OMP_sch_static;
3301 }
3302 
3303 static int addMonoNonMonoModifier(OpenMPSchedType Schedule,
3304                                   OpenMPScheduleClauseModifier M1,
3305                                   OpenMPScheduleClauseModifier M2) {
3306   int Modifier = 0;
3307   switch (M1) {
3308   case OMPC_SCHEDULE_MODIFIER_monotonic:
3309     Modifier = OMP_sch_modifier_monotonic;
3310     break;
3311   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3312     Modifier = OMP_sch_modifier_nonmonotonic;
3313     break;
3314   case OMPC_SCHEDULE_MODIFIER_simd:
3315     if (Schedule == OMP_sch_static_chunked)
3316       Schedule = OMP_sch_static_balanced_chunked;
3317     break;
3318   case OMPC_SCHEDULE_MODIFIER_last:
3319   case OMPC_SCHEDULE_MODIFIER_unknown:
3320     break;
3321   }
3322   switch (M2) {
3323   case OMPC_SCHEDULE_MODIFIER_monotonic:
3324     Modifier = OMP_sch_modifier_monotonic;
3325     break;
3326   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3327     Modifier = OMP_sch_modifier_nonmonotonic;
3328     break;
3329   case OMPC_SCHEDULE_MODIFIER_simd:
3330     if (Schedule == OMP_sch_static_chunked)
3331       Schedule = OMP_sch_static_balanced_chunked;
3332     break;
3333   case OMPC_SCHEDULE_MODIFIER_last:
3334   case OMPC_SCHEDULE_MODIFIER_unknown:
3335     break;
3336   }
3337   return Schedule | Modifier;
3338 }
3339 
3340 void CGOpenMPRuntime::emitForDispatchInit(
3341     CodeGenFunction &CGF, SourceLocation Loc,
3342     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3343     bool Ordered, const DispatchRTInput &DispatchValues) {
3344   if (!CGF.HaveInsertPoint())
3345     return;
3346   OpenMPSchedType Schedule = getRuntimeSchedule(
3347       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3348   assert(Ordered ||
3349          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3350           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3351           Schedule != OMP_sch_static_balanced_chunked));
3352   // Call __kmpc_dispatch_init(
3353   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3354   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
3355   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
3356 
3357   // If the Chunk was not specified in the clause - use default value 1.
3358   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3359                                             : CGF.Builder.getIntN(IVSize, 1);
3360   llvm::Value *Args[] = {
3361       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3362       CGF.Builder.getInt32(addMonoNonMonoModifier(
3363           Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3364       DispatchValues.LB,                                // Lower
3365       DispatchValues.UB,                                // Upper
3366       CGF.Builder.getIntN(IVSize, 1),                   // Stride
3367       Chunk                                             // Chunk
3368   };
3369   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3370 }
3371 
3372 static void emitForStaticInitCall(
3373     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3374     llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule,
3375     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
3376     const CGOpenMPRuntime::StaticRTInput &Values) {
3377   if (!CGF.HaveInsertPoint())
3378     return;
3379 
3380   assert(!Values.Ordered);
3381   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3382          Schedule == OMP_sch_static_balanced_chunked ||
3383          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3384          Schedule == OMP_dist_sch_static ||
3385          Schedule == OMP_dist_sch_static_chunked);
3386 
3387   // Call __kmpc_for_static_init(
3388   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3389   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3390   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3391   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
3392   llvm::Value *Chunk = Values.Chunk;
3393   if (Chunk == nullptr) {
3394     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3395             Schedule == OMP_dist_sch_static) &&
3396            "expected static non-chunked schedule");
3397     // If the Chunk was not specified in the clause - use default value 1.
3398     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3399   } else {
3400     assert((Schedule == OMP_sch_static_chunked ||
3401             Schedule == OMP_sch_static_balanced_chunked ||
3402             Schedule == OMP_ord_static_chunked ||
3403             Schedule == OMP_dist_sch_static_chunked) &&
3404            "expected static chunked schedule");
3405   }
3406   llvm::Value *Args[] = {
3407       UpdateLocation,
3408       ThreadId,
3409       CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1,
3410                                                   M2)), // Schedule type
3411       Values.IL.getPointer(),                           // &isLastIter
3412       Values.LB.getPointer(),                           // &LB
3413       Values.UB.getPointer(),                           // &UB
3414       Values.ST.getPointer(),                           // &Stride
3415       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
3416       Chunk                                             // Chunk
3417   };
3418   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3419 }
3420 
3421 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
3422                                         SourceLocation Loc,
3423                                         OpenMPDirectiveKind DKind,
3424                                         const OpenMPScheduleTy &ScheduleKind,
3425                                         const StaticRTInput &Values) {
3426   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3427       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3428   assert(isOpenMPWorksharingDirective(DKind) &&
3429          "Expected loop-based or sections-based directive.");
3430   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3431                                              isOpenMPLoopDirective(DKind)
3432                                                  ? OMP_IDENT_WORK_LOOP
3433                                                  : OMP_IDENT_WORK_SECTIONS);
3434   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3435   llvm::Constant *StaticInitFunction =
3436       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3437   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3438                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3439 }
3440 
3441 void CGOpenMPRuntime::emitDistributeStaticInit(
3442     CodeGenFunction &CGF, SourceLocation Loc,
3443     OpenMPDistScheduleClauseKind SchedKind,
3444     const CGOpenMPRuntime::StaticRTInput &Values) {
3445   OpenMPSchedType ScheduleNum =
3446       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3447   llvm::Value *UpdatedLocation =
3448       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3449   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3450   llvm::Constant *StaticInitFunction =
3451       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3452   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3453                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3454                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
3455 }
3456 
3457 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
3458                                           SourceLocation Loc,
3459                                           OpenMPDirectiveKind DKind) {
3460   if (!CGF.HaveInsertPoint())
3461     return;
3462   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3463   llvm::Value *Args[] = {
3464       emitUpdateLocation(CGF, Loc,
3465                          isOpenMPDistributeDirective(DKind)
3466                              ? OMP_IDENT_WORK_DISTRIBUTE
3467                              : isOpenMPLoopDirective(DKind)
3468                                    ? OMP_IDENT_WORK_LOOP
3469                                    : OMP_IDENT_WORK_SECTIONS),
3470       getThreadID(CGF, Loc)};
3471   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
3472                       Args);
3473 }
3474 
3475 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
3476                                                  SourceLocation Loc,
3477                                                  unsigned IVSize,
3478                                                  bool IVSigned) {
3479   if (!CGF.HaveInsertPoint())
3480     return;
3481   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3482   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3483   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3484 }
3485 
3486 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
3487                                           SourceLocation Loc, unsigned IVSize,
3488                                           bool IVSigned, Address IL,
3489                                           Address LB, Address UB,
3490                                           Address ST) {
3491   // Call __kmpc_dispatch_next(
3492   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3493   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3494   //          kmp_int[32|64] *p_stride);
3495   llvm::Value *Args[] = {
3496       emitUpdateLocation(CGF, Loc),
3497       getThreadID(CGF, Loc),
3498       IL.getPointer(), // &isLastIter
3499       LB.getPointer(), // &Lower
3500       UB.getPointer(), // &Upper
3501       ST.getPointer()  // &Stride
3502   };
3503   llvm::Value *Call =
3504       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3505   return CGF.EmitScalarConversion(
3506       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
3507       CGF.getContext().BoolTy, Loc);
3508 }
3509 
3510 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
3511                                            llvm::Value *NumThreads,
3512                                            SourceLocation Loc) {
3513   if (!CGF.HaveInsertPoint())
3514     return;
3515   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3516   llvm::Value *Args[] = {
3517       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3518       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3519   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
3520                       Args);
3521 }
3522 
3523 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
3524                                          OpenMPProcBindClauseKind ProcBind,
3525                                          SourceLocation Loc) {
3526   if (!CGF.HaveInsertPoint())
3527     return;
3528   // Constants for proc bind value accepted by the runtime.
3529   enum ProcBindTy {
3530     ProcBindFalse = 0,
3531     ProcBindTrue,
3532     ProcBindMaster,
3533     ProcBindClose,
3534     ProcBindSpread,
3535     ProcBindIntel,
3536     ProcBindDefault
3537   } RuntimeProcBind;
3538   switch (ProcBind) {
3539   case OMPC_PROC_BIND_master:
3540     RuntimeProcBind = ProcBindMaster;
3541     break;
3542   case OMPC_PROC_BIND_close:
3543     RuntimeProcBind = ProcBindClose;
3544     break;
3545   case OMPC_PROC_BIND_spread:
3546     RuntimeProcBind = ProcBindSpread;
3547     break;
3548   case OMPC_PROC_BIND_unknown:
3549     llvm_unreachable("Unsupported proc_bind value.");
3550   }
3551   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3552   llvm::Value *Args[] = {
3553       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3554       llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
3555   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
3556 }
3557 
3558 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3559                                 SourceLocation Loc) {
3560   if (!CGF.HaveInsertPoint())
3561     return;
3562   // Build call void __kmpc_flush(ident_t *loc)
3563   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
3564                       emitUpdateLocation(CGF, Loc));
3565 }
3566 
3567 namespace {
3568 /// Indexes of fields for type kmp_task_t.
3569 enum KmpTaskTFields {
3570   /// List of shared variables.
3571   KmpTaskTShareds,
3572   /// Task routine.
3573   KmpTaskTRoutine,
3574   /// Partition id for the untied tasks.
3575   KmpTaskTPartId,
3576   /// Function with call of destructors for private variables.
3577   Data1,
3578   /// Task priority.
3579   Data2,
3580   /// (Taskloops only) Lower bound.
3581   KmpTaskTLowerBound,
3582   /// (Taskloops only) Upper bound.
3583   KmpTaskTUpperBound,
3584   /// (Taskloops only) Stride.
3585   KmpTaskTStride,
3586   /// (Taskloops only) Is last iteration flag.
3587   KmpTaskTLastIter,
3588   /// (Taskloops only) Reduction data.
3589   KmpTaskTReductions,
3590 };
3591 } // anonymous namespace
3592 
3593 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3594   return OffloadEntriesTargetRegion.empty() &&
3595          OffloadEntriesDeviceGlobalVar.empty();
3596 }
3597 
3598 /// Initialize target region entry.
3599 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3600     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3601                                     StringRef ParentName, unsigned LineNum,
3602                                     unsigned Order) {
3603   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3604                                              "only required for the device "
3605                                              "code generation.");
3606   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3607       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3608                                    OMPTargetRegionEntryTargetRegion);
3609   ++OffloadingEntriesNum;
3610 }
3611 
3612 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3613     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3614                                   StringRef ParentName, unsigned LineNum,
3615                                   llvm::Constant *Addr, llvm::Constant *ID,
3616                                   OMPTargetRegionEntryKind Flags) {
3617   // If we are emitting code for a target, the entry is already initialized,
3618   // only has to be registered.
3619   if (CGM.getLangOpts().OpenMPIsDevice) {
3620     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3621       unsigned DiagID = CGM.getDiags().getCustomDiagID(
3622           DiagnosticsEngine::Error,
3623           "Unable to find target region on line '%0' in the device code.");
3624       CGM.getDiags().Report(DiagID) << LineNum;
3625       return;
3626     }
3627     auto &Entry =
3628         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3629     assert(Entry.isValid() && "Entry not initialized!");
3630     Entry.setAddress(Addr);
3631     Entry.setID(ID);
3632     Entry.setFlags(Flags);
3633   } else {
3634     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3635     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3636     ++OffloadingEntriesNum;
3637   }
3638 }
3639 
3640 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3641     unsigned DeviceID, unsigned FileID, StringRef ParentName,
3642     unsigned LineNum) const {
3643   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3644   if (PerDevice == OffloadEntriesTargetRegion.end())
3645     return false;
3646   auto PerFile = PerDevice->second.find(FileID);
3647   if (PerFile == PerDevice->second.end())
3648     return false;
3649   auto PerParentName = PerFile->second.find(ParentName);
3650   if (PerParentName == PerFile->second.end())
3651     return false;
3652   auto PerLine = PerParentName->second.find(LineNum);
3653   if (PerLine == PerParentName->second.end())
3654     return false;
3655   // Fail if this entry is already registered.
3656   if (PerLine->second.getAddress() || PerLine->second.getID())
3657     return false;
3658   return true;
3659 }
3660 
3661 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3662     const OffloadTargetRegionEntryInfoActTy &Action) {
3663   // Scan all target region entries and perform the provided action.
3664   for (const auto &D : OffloadEntriesTargetRegion)
3665     for (const auto &F : D.second)
3666       for (const auto &P : F.second)
3667         for (const auto &L : P.second)
3668           Action(D.first, F.first, P.first(), L.first, L.second);
3669 }
3670 
3671 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3672     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3673                                        OMPTargetGlobalVarEntryKind Flags,
3674                                        unsigned Order) {
3675   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3676                                              "only required for the device "
3677                                              "code generation.");
3678   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3679   ++OffloadingEntriesNum;
3680 }
3681 
3682 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3683     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3684                                      CharUnits VarSize,
3685                                      OMPTargetGlobalVarEntryKind Flags,
3686                                      llvm::GlobalValue::LinkageTypes Linkage) {
3687   if (CGM.getLangOpts().OpenMPIsDevice) {
3688     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3689     assert(Entry.isValid() && Entry.getFlags() == Flags &&
3690            "Entry not initialized!");
3691     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3692            "Resetting with the new address.");
3693     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName))
3694       return;
3695     Entry.setAddress(Addr);
3696     Entry.setVarSize(VarSize);
3697     Entry.setLinkage(Linkage);
3698   } else {
3699     if (hasDeviceGlobalVarEntryInfo(VarName))
3700       return;
3701     OffloadEntriesDeviceGlobalVar.try_emplace(
3702         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3703     ++OffloadingEntriesNum;
3704   }
3705 }
3706 
3707 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3708     actOnDeviceGlobalVarEntriesInfo(
3709         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3710   // Scan all target region entries and perform the provided action.
3711   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3712     Action(E.getKey(), E.getValue());
3713 }
3714 
3715 llvm::Function *
3716 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
3717   // If we don't have entries or if we are emitting code for the device, we
3718   // don't need to do anything.
3719   if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
3720     return nullptr;
3721 
3722   llvm::Module &M = CGM.getModule();
3723   ASTContext &C = CGM.getContext();
3724 
3725   // Get list of devices we care about
3726   const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples;
3727 
3728   // We should be creating an offloading descriptor only if there are devices
3729   // specified.
3730   assert(!Devices.empty() && "No OpenMP offloading devices??");
3731 
3732   // Create the external variables that will point to the begin and end of the
3733   // host entries section. These will be defined by the linker.
3734   llvm::Type *OffloadEntryTy =
3735       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
3736   std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"});
3737   auto *HostEntriesBegin = new llvm::GlobalVariable(
3738       M, OffloadEntryTy, /*isConstant=*/true,
3739       llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
3740       EntriesBeginName);
3741   std::string EntriesEndName = getName({"omp_offloading", "entries_end"});
3742   auto *HostEntriesEnd =
3743       new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true,
3744                                llvm::GlobalValue::ExternalLinkage,
3745                                /*Initializer=*/nullptr, EntriesEndName);
3746 
3747   // Create all device images
3748   auto *DeviceImageTy = cast<llvm::StructType>(
3749       CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy()));
3750   ConstantInitBuilder DeviceImagesBuilder(CGM);
3751   ConstantArrayBuilder DeviceImagesEntries =
3752       DeviceImagesBuilder.beginArray(DeviceImageTy);
3753 
3754   for (const llvm::Triple &Device : Devices) {
3755     StringRef T = Device.getTriple();
3756     std::string BeginName = getName({"omp_offloading", "img_start", ""});
3757     auto *ImgBegin = new llvm::GlobalVariable(
3758         M, CGM.Int8Ty, /*isConstant=*/true,
3759         llvm::GlobalValue::ExternalWeakLinkage,
3760         /*Initializer=*/nullptr, Twine(BeginName).concat(T));
3761     std::string EndName = getName({"omp_offloading", "img_end", ""});
3762     auto *ImgEnd = new llvm::GlobalVariable(
3763         M, CGM.Int8Ty, /*isConstant=*/true,
3764         llvm::GlobalValue::ExternalWeakLinkage,
3765         /*Initializer=*/nullptr, Twine(EndName).concat(T));
3766 
3767     llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin,
3768                               HostEntriesEnd};
3769     createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data,
3770                                              DeviceImagesEntries);
3771   }
3772 
3773   // Create device images global array.
3774   std::string ImagesName = getName({"omp_offloading", "device_images"});
3775   llvm::GlobalVariable *DeviceImages =
3776       DeviceImagesEntries.finishAndCreateGlobal(ImagesName,
3777                                                 CGM.getPointerAlign(),
3778                                                 /*isConstant=*/true);
3779   DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3780 
3781   // This is a Zero array to be used in the creation of the constant expressions
3782   llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
3783                              llvm::Constant::getNullValue(CGM.Int32Ty)};
3784 
3785   // Create the target region descriptor.
3786   llvm::Constant *Data[] = {
3787       llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()),
3788       llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(),
3789                                            DeviceImages, Index),
3790       HostEntriesBegin, HostEntriesEnd};
3791   std::string Descriptor = getName({"omp_offloading", "descriptor"});
3792   llvm::GlobalVariable *Desc = createGlobalStruct(
3793       CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor);
3794 
3795   // Emit code to register or unregister the descriptor at execution
3796   // startup or closing, respectively.
3797 
3798   llvm::Function *UnRegFn;
3799   {
3800     FunctionArgList Args;
3801     ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other);
3802     Args.push_back(&DummyPtr);
3803 
3804     CodeGenFunction CGF(CGM);
3805     // Disable debug info for global (de-)initializer because they are not part
3806     // of some particular construct.
3807     CGF.disableDebugInfo();
3808     const auto &FI =
3809         CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3810     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
3811     std::string UnregName = getName({"omp_offloading", "descriptor_unreg"});
3812     UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI);
3813     CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args);
3814     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
3815                         Desc);
3816     CGF.FinishFunction();
3817   }
3818   llvm::Function *RegFn;
3819   {
3820     CodeGenFunction CGF(CGM);
3821     // Disable debug info for global (de-)initializer because they are not part
3822     // of some particular construct.
3823     CGF.disableDebugInfo();
3824     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
3825     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
3826 
3827     // Encode offload target triples into the registration function name. It
3828     // will serve as a comdat key for the registration/unregistration code for
3829     // this particular combination of offloading targets.
3830     SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U);
3831     RegFnNameParts[0] = "omp_offloading";
3832     RegFnNameParts[1] = "descriptor_reg";
3833     llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2),
3834                     [](const llvm::Triple &T) -> const std::string& {
3835                       return T.getTriple();
3836                     });
3837     llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end());
3838     std::string Descriptor = getName(RegFnNameParts);
3839     RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI);
3840     CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList());
3841     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc);
3842     // Create a variable to drive the registration and unregistration of the
3843     // descriptor, so we can reuse the logic that emits Ctors and Dtors.
3844     ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(),
3845                                   SourceLocation(), nullptr, C.CharTy,
3846                                   ImplicitParamDecl::Other);
3847     CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
3848     CGF.FinishFunction();
3849   }
3850   if (CGM.supportsCOMDAT()) {
3851     // It is sufficient to call registration function only once, so create a
3852     // COMDAT group for registration/unregistration functions and associated
3853     // data. That would reduce startup time and code size. Registration
3854     // function serves as a COMDAT group key.
3855     llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName());
3856     RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
3857     RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility);
3858     RegFn->setComdat(ComdatKey);
3859     UnRegFn->setComdat(ComdatKey);
3860     DeviceImages->setComdat(ComdatKey);
3861     Desc->setComdat(ComdatKey);
3862   }
3863   return RegFn;
3864 }
3865 
3866 void CGOpenMPRuntime::createOffloadEntry(
3867     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3868     llvm::GlobalValue::LinkageTypes Linkage) {
3869   StringRef Name = Addr->getName();
3870   llvm::Module &M = CGM.getModule();
3871   llvm::LLVMContext &C = M.getContext();
3872 
3873   // Create constant string with the name.
3874   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3875 
3876   std::string StringName = getName({"omp_offloading", "entry_name"});
3877   auto *Str = new llvm::GlobalVariable(
3878       M, StrPtrInit->getType(), /*isConstant=*/true,
3879       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3880   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3881 
3882   llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
3883                             llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
3884                             llvm::ConstantInt::get(CGM.SizeTy, Size),
3885                             llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3886                             llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3887   std::string EntryName = getName({"omp_offloading", "entry", ""});
3888   llvm::GlobalVariable *Entry = createGlobalStruct(
3889       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3890       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3891 
3892   // The entry has to be created in the section the linker expects it to be.
3893   std::string Section = getName({"omp_offloading", "entries"});
3894   Entry->setSection(Section);
3895 }
3896 
3897 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3898   // Emit the offloading entries and metadata so that the device codegen side
3899   // can easily figure out what to emit. The produced metadata looks like
3900   // this:
3901   //
3902   // !omp_offload.info = !{!1, ...}
3903   //
3904   // Right now we only generate metadata for function that contain target
3905   // regions.
3906 
3907   // If we do not have entries, we don't need to do anything.
3908   if (OffloadEntriesInfoManager.empty())
3909     return;
3910 
3911   llvm::Module &M = CGM.getModule();
3912   llvm::LLVMContext &C = M.getContext();
3913   SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
3914       OrderedEntries(OffloadEntriesInfoManager.size());
3915 
3916   // Auxiliary methods to create metadata values and strings.
3917   auto &&GetMDInt = [this](unsigned V) {
3918     return llvm::ConstantAsMetadata::get(
3919         llvm::ConstantInt::get(CGM.Int32Ty, V));
3920   };
3921 
3922   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3923 
3924   // Create the offloading info metadata node.
3925   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3926 
3927   // Create function that emits metadata for each target region entry;
3928   auto &&TargetRegionMetadataEmitter =
3929       [&C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
3930           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3931           unsigned Line,
3932           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3933         // Generate metadata for target regions. Each entry of this metadata
3934         // contains:
3935         // - Entry 0 -> Kind of this type of metadata (0).
3936         // - Entry 1 -> Device ID of the file where the entry was identified.
3937         // - Entry 2 -> File ID of the file where the entry was identified.
3938         // - Entry 3 -> Mangled name of the function where the entry was
3939         // identified.
3940         // - Entry 4 -> Line in the file where the entry was identified.
3941         // - Entry 5 -> Order the entry was created.
3942         // The first element of the metadata node is the kind.
3943         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3944                                  GetMDInt(FileID),      GetMDString(ParentName),
3945                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3946 
3947         // Save this entry in the right position of the ordered entries array.
3948         OrderedEntries[E.getOrder()] = &E;
3949 
3950         // Add metadata to the named metadata node.
3951         MD->addOperand(llvm::MDNode::get(C, Ops));
3952       };
3953 
3954   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3955       TargetRegionMetadataEmitter);
3956 
3957   // Create function that emits metadata for each device global variable entry;
3958   auto &&DeviceGlobalVarMetadataEmitter =
3959       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3960        MD](StringRef MangledName,
3961            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3962                &E) {
3963         // Generate metadata for global variables. Each entry of this metadata
3964         // contains:
3965         // - Entry 0 -> Kind of this type of metadata (1).
3966         // - Entry 1 -> Mangled name of the variable.
3967         // - Entry 2 -> Declare target kind.
3968         // - Entry 3 -> Order the entry was created.
3969         // The first element of the metadata node is the kind.
3970         llvm::Metadata *Ops[] = {
3971             GetMDInt(E.getKind()), GetMDString(MangledName),
3972             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3973 
3974         // Save this entry in the right position of the ordered entries array.
3975         OrderedEntries[E.getOrder()] = &E;
3976 
3977         // Add metadata to the named metadata node.
3978         MD->addOperand(llvm::MDNode::get(C, Ops));
3979       };
3980 
3981   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3982       DeviceGlobalVarMetadataEmitter);
3983 
3984   for (const auto *E : OrderedEntries) {
3985     assert(E && "All ordered entries must exist!");
3986     if (const auto *CE =
3987             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3988                 E)) {
3989       if (!CE->getID() || !CE->getAddress()) {
3990         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3991             DiagnosticsEngine::Error,
3992             "Offloading entry for target region is incorrect: either the "
3993             "address or the ID is invalid.");
3994         CGM.getDiags().Report(DiagID);
3995         continue;
3996       }
3997       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3998                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3999     } else if (const auto *CE =
4000                    dyn_cast<OffloadEntriesInfoManagerTy::
4001                                 OffloadEntryInfoDeviceGlobalVar>(E)) {
4002       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
4003           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4004               CE->getFlags());
4005       switch (Flags) {
4006       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
4007         if (!CE->getAddress()) {
4008           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4009               DiagnosticsEngine::Error,
4010               "Offloading entry for declare target variable is incorrect: the "
4011               "address is invalid.");
4012           CGM.getDiags().Report(DiagID);
4013           continue;
4014         }
4015         // The vaiable has no definition - no need to add the entry.
4016         if (CE->getVarSize().isZero())
4017           continue;
4018         break;
4019       }
4020       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
4021         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
4022                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
4023                "Declaret target link address is set.");
4024         if (CGM.getLangOpts().OpenMPIsDevice)
4025           continue;
4026         if (!CE->getAddress()) {
4027           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4028               DiagnosticsEngine::Error,
4029               "Offloading entry for declare target variable is incorrect: the "
4030               "address is invalid.");
4031           CGM.getDiags().Report(DiagID);
4032           continue;
4033         }
4034         break;
4035       }
4036       createOffloadEntry(CE->getAddress(), CE->getAddress(),
4037                          CE->getVarSize().getQuantity(), Flags,
4038                          CE->getLinkage());
4039     } else {
4040       llvm_unreachable("Unsupported entry kind.");
4041     }
4042   }
4043 }
4044 
4045 /// Loads all the offload entries information from the host IR
4046 /// metadata.
4047 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
4048   // If we are in target mode, load the metadata from the host IR. This code has
4049   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
4050 
4051   if (!CGM.getLangOpts().OpenMPIsDevice)
4052     return;
4053 
4054   if (CGM.getLangOpts().OMPHostIRFile.empty())
4055     return;
4056 
4057   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
4058   if (auto EC = Buf.getError()) {
4059     CGM.getDiags().Report(diag::err_cannot_open_file)
4060         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4061     return;
4062   }
4063 
4064   llvm::LLVMContext C;
4065   auto ME = expectedToErrorOrAndEmitErrors(
4066       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
4067 
4068   if (auto EC = ME.getError()) {
4069     unsigned DiagID = CGM.getDiags().getCustomDiagID(
4070         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
4071     CGM.getDiags().Report(DiagID)
4072         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4073     return;
4074   }
4075 
4076   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
4077   if (!MD)
4078     return;
4079 
4080   for (llvm::MDNode *MN : MD->operands()) {
4081     auto &&GetMDInt = [MN](unsigned Idx) {
4082       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
4083       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
4084     };
4085 
4086     auto &&GetMDString = [MN](unsigned Idx) {
4087       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
4088       return V->getString();
4089     };
4090 
4091     switch (GetMDInt(0)) {
4092     default:
4093       llvm_unreachable("Unexpected metadata!");
4094       break;
4095     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4096         OffloadingEntryInfoTargetRegion:
4097       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
4098           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
4099           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
4100           /*Order=*/GetMDInt(5));
4101       break;
4102     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4103         OffloadingEntryInfoDeviceGlobalVar:
4104       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
4105           /*MangledName=*/GetMDString(1),
4106           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4107               /*Flags=*/GetMDInt(2)),
4108           /*Order=*/GetMDInt(3));
4109       break;
4110     }
4111   }
4112 }
4113 
4114 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
4115   if (!KmpRoutineEntryPtrTy) {
4116     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
4117     ASTContext &C = CGM.getContext();
4118     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
4119     FunctionProtoType::ExtProtoInfo EPI;
4120     KmpRoutineEntryPtrQTy = C.getPointerType(
4121         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
4122     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
4123   }
4124 }
4125 
4126 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
4127   // Make sure the type of the entry is already created. This is the type we
4128   // have to create:
4129   // struct __tgt_offload_entry{
4130   //   void      *addr;       // Pointer to the offload entry info.
4131   //                          // (function or global)
4132   //   char      *name;       // Name of the function or global.
4133   //   size_t     size;       // Size of the entry info (0 if it a function).
4134   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
4135   //   int32_t    reserved;   // Reserved, to use by the runtime library.
4136   // };
4137   if (TgtOffloadEntryQTy.isNull()) {
4138     ASTContext &C = CGM.getContext();
4139     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
4140     RD->startDefinition();
4141     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4142     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
4143     addFieldToRecordDecl(C, RD, C.getSizeType());
4144     addFieldToRecordDecl(
4145         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4146     addFieldToRecordDecl(
4147         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4148     RD->completeDefinition();
4149     RD->addAttr(PackedAttr::CreateImplicit(C));
4150     TgtOffloadEntryQTy = C.getRecordType(RD);
4151   }
4152   return TgtOffloadEntryQTy;
4153 }
4154 
4155 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() {
4156   // These are the types we need to build:
4157   // struct __tgt_device_image{
4158   // void   *ImageStart;       // Pointer to the target code start.
4159   // void   *ImageEnd;         // Pointer to the target code end.
4160   // // We also add the host entries to the device image, as it may be useful
4161   // // for the target runtime to have access to that information.
4162   // __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all
4163   //                                       // the entries.
4164   // __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
4165   //                                       // entries (non inclusive).
4166   // };
4167   if (TgtDeviceImageQTy.isNull()) {
4168     ASTContext &C = CGM.getContext();
4169     RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image");
4170     RD->startDefinition();
4171     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4172     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4173     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4174     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4175     RD->completeDefinition();
4176     TgtDeviceImageQTy = C.getRecordType(RD);
4177   }
4178   return TgtDeviceImageQTy;
4179 }
4180 
4181 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() {
4182   // struct __tgt_bin_desc{
4183   //   int32_t              NumDevices;      // Number of devices supported.
4184   //   __tgt_device_image   *DeviceImages;   // Arrays of device images
4185   //                                         // (one per device).
4186   //   __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all the
4187   //                                         // entries.
4188   //   __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
4189   //                                         // entries (non inclusive).
4190   // };
4191   if (TgtBinaryDescriptorQTy.isNull()) {
4192     ASTContext &C = CGM.getContext();
4193     RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc");
4194     RD->startDefinition();
4195     addFieldToRecordDecl(
4196         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4197     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy()));
4198     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4199     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4200     RD->completeDefinition();
4201     TgtBinaryDescriptorQTy = C.getRecordType(RD);
4202   }
4203   return TgtBinaryDescriptorQTy;
4204 }
4205 
4206 namespace {
4207 struct PrivateHelpersTy {
4208   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
4209                    const VarDecl *PrivateElemInit)
4210       : Original(Original), PrivateCopy(PrivateCopy),
4211         PrivateElemInit(PrivateElemInit) {}
4212   const VarDecl *Original;
4213   const VarDecl *PrivateCopy;
4214   const VarDecl *PrivateElemInit;
4215 };
4216 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4217 } // anonymous namespace
4218 
4219 static RecordDecl *
4220 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
4221   if (!Privates.empty()) {
4222     ASTContext &C = CGM.getContext();
4223     // Build struct .kmp_privates_t. {
4224     //         /*  private vars  */
4225     //       };
4226     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
4227     RD->startDefinition();
4228     for (const auto &Pair : Privates) {
4229       const VarDecl *VD = Pair.second.Original;
4230       QualType Type = VD->getType().getNonReferenceType();
4231       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
4232       if (VD->hasAttrs()) {
4233         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4234              E(VD->getAttrs().end());
4235              I != E; ++I)
4236           FD->addAttr(*I);
4237       }
4238     }
4239     RD->completeDefinition();
4240     return RD;
4241   }
4242   return nullptr;
4243 }
4244 
4245 static RecordDecl *
4246 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
4247                          QualType KmpInt32Ty,
4248                          QualType KmpRoutineEntryPointerQTy) {
4249   ASTContext &C = CGM.getContext();
4250   // Build struct kmp_task_t {
4251   //         void *              shareds;
4252   //         kmp_routine_entry_t routine;
4253   //         kmp_int32           part_id;
4254   //         kmp_cmplrdata_t data1;
4255   //         kmp_cmplrdata_t data2;
4256   // For taskloops additional fields:
4257   //         kmp_uint64          lb;
4258   //         kmp_uint64          ub;
4259   //         kmp_int64           st;
4260   //         kmp_int32           liter;
4261   //         void *              reductions;
4262   //       };
4263   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4264   UD->startDefinition();
4265   addFieldToRecordDecl(C, UD, KmpInt32Ty);
4266   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4267   UD->completeDefinition();
4268   QualType KmpCmplrdataTy = C.getRecordType(UD);
4269   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
4270   RD->startDefinition();
4271   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4272   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4273   addFieldToRecordDecl(C, RD, KmpInt32Ty);
4274   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4275   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4276   if (isOpenMPTaskLoopDirective(Kind)) {
4277     QualType KmpUInt64Ty =
4278         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4279     QualType KmpInt64Ty =
4280         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4281     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4282     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4283     addFieldToRecordDecl(C, RD, KmpInt64Ty);
4284     addFieldToRecordDecl(C, RD, KmpInt32Ty);
4285     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4286   }
4287   RD->completeDefinition();
4288   return RD;
4289 }
4290 
4291 static RecordDecl *
4292 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
4293                                      ArrayRef<PrivateDataTy> Privates) {
4294   ASTContext &C = CGM.getContext();
4295   // Build struct kmp_task_t_with_privates {
4296   //         kmp_task_t task_data;
4297   //         .kmp_privates_t. privates;
4298   //       };
4299   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4300   RD->startDefinition();
4301   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4302   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
4303     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4304   RD->completeDefinition();
4305   return RD;
4306 }
4307 
4308 /// Emit a proxy function which accepts kmp_task_t as the second
4309 /// argument.
4310 /// \code
4311 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4312 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4313 ///   For taskloops:
4314 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4315 ///   tt->reductions, tt->shareds);
4316 ///   return 0;
4317 /// }
4318 /// \endcode
4319 static llvm::Value *
4320 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
4321                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4322                       QualType KmpTaskTWithPrivatesPtrQTy,
4323                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4324                       QualType SharedsPtrTy, llvm::Value *TaskFunction,
4325                       llvm::Value *TaskPrivatesMap) {
4326   ASTContext &C = CGM.getContext();
4327   FunctionArgList Args;
4328   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4329                             ImplicitParamDecl::Other);
4330   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4331                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4332                                 ImplicitParamDecl::Other);
4333   Args.push_back(&GtidArg);
4334   Args.push_back(&TaskTypeArg);
4335   const auto &TaskEntryFnInfo =
4336       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4337   llvm::FunctionType *TaskEntryTy =
4338       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4339   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
4340   auto *TaskEntry = llvm::Function::Create(
4341       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4342   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4343   TaskEntry->setDoesNotRecurse();
4344   CodeGenFunction CGF(CGM);
4345   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4346                     Loc, Loc);
4347 
4348   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4349   // tt,
4350   // For taskloops:
4351   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4352   // tt->task_data.shareds);
4353   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
4354       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4355   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4356       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4357       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4358   const auto *KmpTaskTWithPrivatesQTyRD =
4359       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4360   LValue Base =
4361       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4362   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4363   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4364   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4365   llvm::Value *PartidParam = PartIdLVal.getPointer();
4366 
4367   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4368   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4369   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4370       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4371       CGF.ConvertTypeForMem(SharedsPtrTy));
4372 
4373   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4374   llvm::Value *PrivatesParam;
4375   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4376     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4377     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4378         PrivatesLVal.getPointer(), CGF.VoidPtrTy);
4379   } else {
4380     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4381   }
4382 
4383   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4384                                TaskPrivatesMap,
4385                                CGF.Builder
4386                                    .CreatePointerBitCastOrAddrSpaceCast(
4387                                        TDBase.getAddress(), CGF.VoidPtrTy)
4388                                    .getPointer()};
4389   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4390                                           std::end(CommonArgs));
4391   if (isOpenMPTaskLoopDirective(Kind)) {
4392     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4393     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4394     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4395     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4396     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4397     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4398     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4399     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
4400     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4401     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4402     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4403     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4404     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4405     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
4406     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4407     CallArgs.push_back(LBParam);
4408     CallArgs.push_back(UBParam);
4409     CallArgs.push_back(StParam);
4410     CallArgs.push_back(LIParam);
4411     CallArgs.push_back(RParam);
4412   }
4413   CallArgs.push_back(SharedsParam);
4414 
4415   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4416                                                   CallArgs);
4417   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4418                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4419   CGF.FinishFunction();
4420   return TaskEntry;
4421 }
4422 
4423 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
4424                                             SourceLocation Loc,
4425                                             QualType KmpInt32Ty,
4426                                             QualType KmpTaskTWithPrivatesPtrQTy,
4427                                             QualType KmpTaskTWithPrivatesQTy) {
4428   ASTContext &C = CGM.getContext();
4429   FunctionArgList Args;
4430   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4431                             ImplicitParamDecl::Other);
4432   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4433                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4434                                 ImplicitParamDecl::Other);
4435   Args.push_back(&GtidArg);
4436   Args.push_back(&TaskTypeArg);
4437   const auto &DestructorFnInfo =
4438       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4439   llvm::FunctionType *DestructorFnTy =
4440       CGM.getTypes().GetFunctionType(DestructorFnInfo);
4441   std::string Name =
4442       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
4443   auto *DestructorFn =
4444       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
4445                              Name, &CGM.getModule());
4446   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4447                                     DestructorFnInfo);
4448   DestructorFn->setDoesNotRecurse();
4449   CodeGenFunction CGF(CGM);
4450   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4451                     Args, Loc, Loc);
4452 
4453   LValue Base = CGF.EmitLoadOfPointerLValue(
4454       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4455       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4456   const auto *KmpTaskTWithPrivatesQTyRD =
4457       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4458   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4459   Base = CGF.EmitLValueForField(Base, *FI);
4460   for (const auto *Field :
4461        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4462     if (QualType::DestructionKind DtorKind =
4463             Field->getType().isDestructedType()) {
4464       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
4465       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
4466     }
4467   }
4468   CGF.FinishFunction();
4469   return DestructorFn;
4470 }
4471 
4472 /// Emit a privates mapping function for correct handling of private and
4473 /// firstprivate variables.
4474 /// \code
4475 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4476 /// **noalias priv1,...,  <tyn> **noalias privn) {
4477 ///   *priv1 = &.privates.priv1;
4478 ///   ...;
4479 ///   *privn = &.privates.privn;
4480 /// }
4481 /// \endcode
4482 static llvm::Value *
4483 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
4484                                ArrayRef<const Expr *> PrivateVars,
4485                                ArrayRef<const Expr *> FirstprivateVars,
4486                                ArrayRef<const Expr *> LastprivateVars,
4487                                QualType PrivatesQTy,
4488                                ArrayRef<PrivateDataTy> Privates) {
4489   ASTContext &C = CGM.getContext();
4490   FunctionArgList Args;
4491   ImplicitParamDecl TaskPrivatesArg(
4492       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4493       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4494       ImplicitParamDecl::Other);
4495   Args.push_back(&TaskPrivatesArg);
4496   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4497   unsigned Counter = 1;
4498   for (const Expr *E : PrivateVars) {
4499     Args.push_back(ImplicitParamDecl::Create(
4500         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4501         C.getPointerType(C.getPointerType(E->getType()))
4502             .withConst()
4503             .withRestrict(),
4504         ImplicitParamDecl::Other));
4505     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4506     PrivateVarsPos[VD] = Counter;
4507     ++Counter;
4508   }
4509   for (const Expr *E : FirstprivateVars) {
4510     Args.push_back(ImplicitParamDecl::Create(
4511         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4512         C.getPointerType(C.getPointerType(E->getType()))
4513             .withConst()
4514             .withRestrict(),
4515         ImplicitParamDecl::Other));
4516     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4517     PrivateVarsPos[VD] = Counter;
4518     ++Counter;
4519   }
4520   for (const Expr *E : LastprivateVars) {
4521     Args.push_back(ImplicitParamDecl::Create(
4522         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4523         C.getPointerType(C.getPointerType(E->getType()))
4524             .withConst()
4525             .withRestrict(),
4526         ImplicitParamDecl::Other));
4527     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4528     PrivateVarsPos[VD] = Counter;
4529     ++Counter;
4530   }
4531   const auto &TaskPrivatesMapFnInfo =
4532       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4533   llvm::FunctionType *TaskPrivatesMapTy =
4534       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4535   std::string Name =
4536       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
4537   auto *TaskPrivatesMap = llvm::Function::Create(
4538       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
4539       &CGM.getModule());
4540   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
4541                                     TaskPrivatesMapFnInfo);
4542   TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4543   TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4544   TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4545   CodeGenFunction CGF(CGM);
4546   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4547                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
4548 
4549   // *privi = &.privates.privi;
4550   LValue Base = CGF.EmitLoadOfPointerLValue(
4551       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4552       TaskPrivatesArg.getType()->castAs<PointerType>());
4553   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4554   Counter = 0;
4555   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
4556     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
4557     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4558     LValue RefLVal =
4559         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4560     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4561         RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
4562     CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
4563     ++Counter;
4564   }
4565   CGF.FinishFunction();
4566   return TaskPrivatesMap;
4567 }
4568 
4569 static bool stable_sort_comparator(const PrivateDataTy P1,
4570                                    const PrivateDataTy P2) {
4571   return P1.first > P2.first;
4572 }
4573 
4574 /// Emit initialization for private variables in task-based directives.
4575 static void emitPrivatesInit(CodeGenFunction &CGF,
4576                              const OMPExecutableDirective &D,
4577                              Address KmpTaskSharedsPtr, LValue TDBase,
4578                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4579                              QualType SharedsTy, QualType SharedsPtrTy,
4580                              const OMPTaskDataTy &Data,
4581                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4582   ASTContext &C = CGF.getContext();
4583   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4584   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4585   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
4586                                  ? OMPD_taskloop
4587                                  : OMPD_task;
4588   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
4589   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
4590   LValue SrcBase;
4591   bool IsTargetTask =
4592       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
4593       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
4594   // For target-based directives skip 3 firstprivate arrays BasePointersArray,
4595   // PointersArray and SizesArray. The original variables for these arrays are
4596   // not captured and we get their addresses explicitly.
4597   if ((!IsTargetTask && !Data.FirstprivateVars.empty()) ||
4598       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
4599     SrcBase = CGF.MakeAddrLValue(
4600         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4601             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4602         SharedsTy);
4603   }
4604   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4605   for (const PrivateDataTy &Pair : Privates) {
4606     const VarDecl *VD = Pair.second.PrivateCopy;
4607     const Expr *Init = VD->getAnyInitializer();
4608     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4609                              !CGF.isTrivialInitializer(Init)))) {
4610       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4611       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
4612         const VarDecl *OriginalVD = Pair.second.Original;
4613         // Check if the variable is the target-based BasePointersArray,
4614         // PointersArray or SizesArray.
4615         LValue SharedRefLValue;
4616         QualType Type = OriginalVD->getType();
4617         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
4618         if (IsTargetTask && !SharedField) {
4619           assert(isa<ImplicitParamDecl>(OriginalVD) &&
4620                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
4621                  cast<CapturedDecl>(OriginalVD->getDeclContext())
4622                          ->getNumParams() == 0 &&
4623                  isa<TranslationUnitDecl>(
4624                      cast<CapturedDecl>(OriginalVD->getDeclContext())
4625                          ->getDeclContext()) &&
4626                  "Expected artificial target data variable.");
4627           SharedRefLValue =
4628               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
4629         } else {
4630           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4631           SharedRefLValue = CGF.MakeAddrLValue(
4632               Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
4633               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
4634               SharedRefLValue.getTBAAInfo());
4635         }
4636         if (Type->isArrayType()) {
4637           // Initialize firstprivate array.
4638           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4639             // Perform simple memcpy.
4640             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
4641           } else {
4642             // Initialize firstprivate array using element-by-element
4643             // initialization.
4644             CGF.EmitOMPAggregateAssign(
4645                 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
4646                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4647                                                   Address SrcElement) {
4648                   // Clean up any temporaries needed by the initialization.
4649                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
4650                   InitScope.addPrivate(
4651                       Elem, [SrcElement]() -> Address { return SrcElement; });
4652                   (void)InitScope.Privatize();
4653                   // Emit initialization for single element.
4654                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
4655                       CGF, &CapturesInfo);
4656                   CGF.EmitAnyExprToMem(Init, DestElement,
4657                                        Init->getType().getQualifiers(),
4658                                        /*IsInitializer=*/false);
4659                 });
4660           }
4661         } else {
4662           CodeGenFunction::OMPPrivateScope InitScope(CGF);
4663           InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
4664             return SharedRefLValue.getAddress();
4665           });
4666           (void)InitScope.Privatize();
4667           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4668           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4669                              /*capturedByInit=*/false);
4670         }
4671       } else {
4672         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4673       }
4674     }
4675     ++FI;
4676   }
4677 }
4678 
4679 /// Check if duplication function is required for taskloops.
4680 static bool checkInitIsRequired(CodeGenFunction &CGF,
4681                                 ArrayRef<PrivateDataTy> Privates) {
4682   bool InitRequired = false;
4683   for (const PrivateDataTy &Pair : Privates) {
4684     const VarDecl *VD = Pair.second.PrivateCopy;
4685     const Expr *Init = VD->getAnyInitializer();
4686     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4687                                     !CGF.isTrivialInitializer(Init));
4688     if (InitRequired)
4689       break;
4690   }
4691   return InitRequired;
4692 }
4693 
4694 
4695 /// Emit task_dup function (for initialization of
4696 /// private/firstprivate/lastprivate vars and last_iter flag)
4697 /// \code
4698 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4699 /// lastpriv) {
4700 /// // setup lastprivate flag
4701 ///    task_dst->last = lastpriv;
4702 /// // could be constructor calls here...
4703 /// }
4704 /// \endcode
4705 static llvm::Value *
4706 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4707                     const OMPExecutableDirective &D,
4708                     QualType KmpTaskTWithPrivatesPtrQTy,
4709                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4710                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4711                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4712                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4713   ASTContext &C = CGM.getContext();
4714   FunctionArgList Args;
4715   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4716                            KmpTaskTWithPrivatesPtrQTy,
4717                            ImplicitParamDecl::Other);
4718   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4719                            KmpTaskTWithPrivatesPtrQTy,
4720                            ImplicitParamDecl::Other);
4721   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4722                                 ImplicitParamDecl::Other);
4723   Args.push_back(&DstArg);
4724   Args.push_back(&SrcArg);
4725   Args.push_back(&LastprivArg);
4726   const auto &TaskDupFnInfo =
4727       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4728   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4729   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4730   auto *TaskDup = llvm::Function::Create(
4731       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4732   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4733   TaskDup->setDoesNotRecurse();
4734   CodeGenFunction CGF(CGM);
4735   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4736                     Loc);
4737 
4738   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4739       CGF.GetAddrOfLocalVar(&DstArg),
4740       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4741   // task_dst->liter = lastpriv;
4742   if (WithLastIter) {
4743     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4744     LValue Base = CGF.EmitLValueForField(
4745         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4746     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4747     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4748         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4749     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4750   }
4751 
4752   // Emit initial values for private copies (if any).
4753   assert(!Privates.empty());
4754   Address KmpTaskSharedsPtr = Address::invalid();
4755   if (!Data.FirstprivateVars.empty()) {
4756     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4757         CGF.GetAddrOfLocalVar(&SrcArg),
4758         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4759     LValue Base = CGF.EmitLValueForField(
4760         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4761     KmpTaskSharedsPtr = Address(
4762         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4763                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4764                                                   KmpTaskTShareds)),
4765                              Loc),
4766         CGF.getNaturalTypeAlignment(SharedsTy));
4767   }
4768   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4769                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4770   CGF.FinishFunction();
4771   return TaskDup;
4772 }
4773 
4774 /// Checks if destructor function is required to be generated.
4775 /// \return true if cleanups are required, false otherwise.
4776 static bool
4777 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4778   bool NeedsCleanup = false;
4779   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4780   const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4781   for (const FieldDecl *FD : PrivateRD->fields()) {
4782     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4783     if (NeedsCleanup)
4784       break;
4785   }
4786   return NeedsCleanup;
4787 }
4788 
4789 CGOpenMPRuntime::TaskResultTy
4790 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4791                               const OMPExecutableDirective &D,
4792                               llvm::Value *TaskFunction, QualType SharedsTy,
4793                               Address Shareds, const OMPTaskDataTy &Data) {
4794   ASTContext &C = CGM.getContext();
4795   llvm::SmallVector<PrivateDataTy, 4> Privates;
4796   // Aggregate privates and sort them by the alignment.
4797   auto I = Data.PrivateCopies.begin();
4798   for (const Expr *E : Data.PrivateVars) {
4799     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4800     Privates.emplace_back(
4801         C.getDeclAlign(VD),
4802         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4803                          /*PrivateElemInit=*/nullptr));
4804     ++I;
4805   }
4806   I = Data.FirstprivateCopies.begin();
4807   auto IElemInitRef = Data.FirstprivateInits.begin();
4808   for (const Expr *E : Data.FirstprivateVars) {
4809     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4810     Privates.emplace_back(
4811         C.getDeclAlign(VD),
4812         PrivateHelpersTy(
4813             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4814             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4815     ++I;
4816     ++IElemInitRef;
4817   }
4818   I = Data.LastprivateCopies.begin();
4819   for (const Expr *E : Data.LastprivateVars) {
4820     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4821     Privates.emplace_back(
4822         C.getDeclAlign(VD),
4823         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4824                          /*PrivateElemInit=*/nullptr));
4825     ++I;
4826   }
4827   std::stable_sort(Privates.begin(), Privates.end(), stable_sort_comparator);
4828   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4829   // Build type kmp_routine_entry_t (if not built yet).
4830   emitKmpRoutineEntryT(KmpInt32Ty);
4831   // Build type kmp_task_t (if not built yet).
4832   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4833     if (SavedKmpTaskloopTQTy.isNull()) {
4834       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4835           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4836     }
4837     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4838   } else {
4839     assert((D.getDirectiveKind() == OMPD_task ||
4840             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4841             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4842            "Expected taskloop, task or target directive");
4843     if (SavedKmpTaskTQTy.isNull()) {
4844       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4845           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4846     }
4847     KmpTaskTQTy = SavedKmpTaskTQTy;
4848   }
4849   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4850   // Build particular struct kmp_task_t for the given task.
4851   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4852       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4853   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4854   QualType KmpTaskTWithPrivatesPtrQTy =
4855       C.getPointerType(KmpTaskTWithPrivatesQTy);
4856   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4857   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4858       KmpTaskTWithPrivatesTy->getPointerTo();
4859   llvm::Value *KmpTaskTWithPrivatesTySize =
4860       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4861   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4862 
4863   // Emit initial values for private copies (if any).
4864   llvm::Value *TaskPrivatesMap = nullptr;
4865   llvm::Type *TaskPrivatesMapTy =
4866       std::next(cast<llvm::Function>(TaskFunction)->arg_begin(), 3)->getType();
4867   if (!Privates.empty()) {
4868     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4869     TaskPrivatesMap = emitTaskPrivateMappingFunction(
4870         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
4871         FI->getType(), Privates);
4872     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4873         TaskPrivatesMap, TaskPrivatesMapTy);
4874   } else {
4875     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4876         cast<llvm::PointerType>(TaskPrivatesMapTy));
4877   }
4878   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4879   // kmp_task_t *tt);
4880   llvm::Value *TaskEntry = emitProxyTaskFunction(
4881       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4882       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4883       TaskPrivatesMap);
4884 
4885   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4886   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4887   // kmp_routine_entry_t *task_entry);
4888   // Task flags. Format is taken from
4889   // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
4890   // description of kmp_tasking_flags struct.
4891   enum {
4892     TiedFlag = 0x1,
4893     FinalFlag = 0x2,
4894     DestructorsFlag = 0x8,
4895     PriorityFlag = 0x20
4896   };
4897   unsigned Flags = Data.Tied ? TiedFlag : 0;
4898   bool NeedsCleanup = false;
4899   if (!Privates.empty()) {
4900     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
4901     if (NeedsCleanup)
4902       Flags = Flags | DestructorsFlag;
4903   }
4904   if (Data.Priority.getInt())
4905     Flags = Flags | PriorityFlag;
4906   llvm::Value *TaskFlags =
4907       Data.Final.getPointer()
4908           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4909                                      CGF.Builder.getInt32(FinalFlag),
4910                                      CGF.Builder.getInt32(/*C=*/0))
4911           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4912   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4913   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4914   llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
4915                               getThreadID(CGF, Loc), TaskFlags,
4916                               KmpTaskTWithPrivatesTySize, SharedsSize,
4917                               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4918                                   TaskEntry, KmpRoutineEntryPtrTy)};
4919   llvm::Value *NewTask = CGF.EmitRuntimeCall(
4920       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
4921   llvm::Value *NewTaskNewTaskTTy =
4922       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4923           NewTask, KmpTaskTWithPrivatesPtrTy);
4924   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4925                                                KmpTaskTWithPrivatesQTy);
4926   LValue TDBase =
4927       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4928   // Fill the data in the resulting kmp_task_t record.
4929   // Copy shareds if there are any.
4930   Address KmpTaskSharedsPtr = Address::invalid();
4931   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4932     KmpTaskSharedsPtr =
4933         Address(CGF.EmitLoadOfScalar(
4934                     CGF.EmitLValueForField(
4935                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4936                                            KmpTaskTShareds)),
4937                     Loc),
4938                 CGF.getNaturalTypeAlignment(SharedsTy));
4939     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4940     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4941     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4942   }
4943   // Emit initial values for private copies (if any).
4944   TaskResultTy Result;
4945   if (!Privates.empty()) {
4946     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4947                      SharedsTy, SharedsPtrTy, Data, Privates,
4948                      /*ForDup=*/false);
4949     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4950         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4951       Result.TaskDupFn = emitTaskDupFunction(
4952           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4953           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4954           /*WithLastIter=*/!Data.LastprivateVars.empty());
4955     }
4956   }
4957   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4958   enum { Priority = 0, Destructors = 1 };
4959   // Provide pointer to function with destructors for privates.
4960   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4961   const RecordDecl *KmpCmplrdataUD =
4962       (*FI)->getType()->getAsUnionType()->getDecl();
4963   if (NeedsCleanup) {
4964     llvm::Value *DestructorFn = emitDestructorsFunction(
4965         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4966         KmpTaskTWithPrivatesQTy);
4967     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4968     LValue DestructorsLV = CGF.EmitLValueForField(
4969         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4970     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4971                               DestructorFn, KmpRoutineEntryPtrTy),
4972                           DestructorsLV);
4973   }
4974   // Set priority.
4975   if (Data.Priority.getInt()) {
4976     LValue Data2LV = CGF.EmitLValueForField(
4977         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4978     LValue PriorityLV = CGF.EmitLValueForField(
4979         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4980     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4981   }
4982   Result.NewTask = NewTask;
4983   Result.TaskEntry = TaskEntry;
4984   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4985   Result.TDBase = TDBase;
4986   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4987   return Result;
4988 }
4989 
4990 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
4991                                    const OMPExecutableDirective &D,
4992                                    llvm::Value *TaskFunction,
4993                                    QualType SharedsTy, Address Shareds,
4994                                    const Expr *IfCond,
4995                                    const OMPTaskDataTy &Data) {
4996   if (!CGF.HaveInsertPoint())
4997     return;
4998 
4999   TaskResultTy Result =
5000       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5001   llvm::Value *NewTask = Result.NewTask;
5002   llvm::Value *TaskEntry = Result.TaskEntry;
5003   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5004   LValue TDBase = Result.TDBase;
5005   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5006   ASTContext &C = CGM.getContext();
5007   // Process list of dependences.
5008   Address DependenciesArray = Address::invalid();
5009   unsigned NumDependencies = Data.Dependences.size();
5010   if (NumDependencies) {
5011     // Dependence kind for RTL.
5012     enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 };
5013     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
5014     RecordDecl *KmpDependInfoRD;
5015     QualType FlagsTy =
5016         C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
5017     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5018     if (KmpDependInfoTy.isNull()) {
5019       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
5020       KmpDependInfoRD->startDefinition();
5021       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
5022       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
5023       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
5024       KmpDependInfoRD->completeDefinition();
5025       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
5026     } else {
5027       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5028     }
5029     CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy);
5030     // Define type kmp_depend_info[<Dependences.size()>];
5031     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5032         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
5033         ArrayType::Normal, /*IndexTypeQuals=*/0);
5034     // kmp_depend_info[<Dependences.size()>] deps;
5035     DependenciesArray =
5036         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
5037     for (unsigned I = 0; I < NumDependencies; ++I) {
5038       const Expr *E = Data.Dependences[I].second;
5039       LValue Addr = CGF.EmitLValue(E);
5040       llvm::Value *Size;
5041       QualType Ty = E->getType();
5042       if (const auto *ASE =
5043               dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
5044         LValue UpAddrLVal =
5045             CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
5046         llvm::Value *UpAddr =
5047             CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
5048         llvm::Value *LowIntPtr =
5049             CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
5050         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
5051         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
5052       } else {
5053         Size = CGF.getTypeSize(Ty);
5054       }
5055       LValue Base = CGF.MakeAddrLValue(
5056           CGF.Builder.CreateConstArrayGEP(DependenciesArray, I, DependencySize),
5057           KmpDependInfoTy);
5058       // deps[i].base_addr = &<Dependences[i].second>;
5059       LValue BaseAddrLVal = CGF.EmitLValueForField(
5060           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5061       CGF.EmitStoreOfScalar(
5062           CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
5063           BaseAddrLVal);
5064       // deps[i].len = sizeof(<Dependences[i].second>);
5065       LValue LenLVal = CGF.EmitLValueForField(
5066           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
5067       CGF.EmitStoreOfScalar(Size, LenLVal);
5068       // deps[i].flags = <Dependences[i].first>;
5069       RTLDependenceKindTy DepKind;
5070       switch (Data.Dependences[I].first) {
5071       case OMPC_DEPEND_in:
5072         DepKind = DepIn;
5073         break;
5074       // Out and InOut dependencies must use the same code.
5075       case OMPC_DEPEND_out:
5076       case OMPC_DEPEND_inout:
5077         DepKind = DepInOut;
5078         break;
5079       case OMPC_DEPEND_source:
5080       case OMPC_DEPEND_sink:
5081       case OMPC_DEPEND_unknown:
5082         llvm_unreachable("Unknown task dependence type");
5083       }
5084       LValue FlagsLVal = CGF.EmitLValueForField(
5085           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5086       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5087                             FlagsLVal);
5088     }
5089     DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5090         CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()),
5091         CGF.VoidPtrTy);
5092   }
5093 
5094   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5095   // libcall.
5096   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5097   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5098   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5099   // list is not empty
5100   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5101   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5102   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5103   llvm::Value *DepTaskArgs[7];
5104   if (NumDependencies) {
5105     DepTaskArgs[0] = UpLoc;
5106     DepTaskArgs[1] = ThreadID;
5107     DepTaskArgs[2] = NewTask;
5108     DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
5109     DepTaskArgs[4] = DependenciesArray.getPointer();
5110     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5111     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5112   }
5113   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
5114                         &TaskArgs,
5115                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5116     if (!Data.Tied) {
5117       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5118       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5119       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5120     }
5121     if (NumDependencies) {
5122       CGF.EmitRuntimeCall(
5123           createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
5124     } else {
5125       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
5126                           TaskArgs);
5127     }
5128     // Check if parent region is untied and build return for untied task;
5129     if (auto *Region =
5130             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5131       Region->emitUntiedSwitch(CGF);
5132   };
5133 
5134   llvm::Value *DepWaitTaskArgs[6];
5135   if (NumDependencies) {
5136     DepWaitTaskArgs[0] = UpLoc;
5137     DepWaitTaskArgs[1] = ThreadID;
5138     DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
5139     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5140     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5141     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5142   }
5143   auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
5144                         NumDependencies, &DepWaitTaskArgs,
5145                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5146     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5147     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5148     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5149     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5150     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5151     // is specified.
5152     if (NumDependencies)
5153       CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
5154                           DepWaitTaskArgs);
5155     // Call proxy_task_entry(gtid, new_task);
5156     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5157                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5158       Action.Enter(CGF);
5159       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5160       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5161                                                           OutlinedFnArgs);
5162     };
5163 
5164     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5165     // kmp_task_t *new_task);
5166     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5167     // kmp_task_t *new_task);
5168     RegionCodeGenTy RCG(CodeGen);
5169     CommonActionTy Action(
5170         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
5171         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
5172     RCG.setAction(Action);
5173     RCG(CGF);
5174   };
5175 
5176   if (IfCond) {
5177     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5178   } else {
5179     RegionCodeGenTy ThenRCG(ThenCodeGen);
5180     ThenRCG(CGF);
5181   }
5182 }
5183 
5184 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5185                                        const OMPLoopDirective &D,
5186                                        llvm::Value *TaskFunction,
5187                                        QualType SharedsTy, Address Shareds,
5188                                        const Expr *IfCond,
5189                                        const OMPTaskDataTy &Data) {
5190   if (!CGF.HaveInsertPoint())
5191     return;
5192   TaskResultTy Result =
5193       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5194   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5195   // libcall.
5196   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5197   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5198   // sched, kmp_uint64 grainsize, void *task_dup);
5199   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5200   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5201   llvm::Value *IfVal;
5202   if (IfCond) {
5203     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5204                                       /*isSigned=*/true);
5205   } else {
5206     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5207   }
5208 
5209   LValue LBLVal = CGF.EmitLValueForField(
5210       Result.TDBase,
5211       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5212   const auto *LBVar =
5213       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5214   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
5215                        /*IsInitializer=*/true);
5216   LValue UBLVal = CGF.EmitLValueForField(
5217       Result.TDBase,
5218       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5219   const auto *UBVar =
5220       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5221   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
5222                        /*IsInitializer=*/true);
5223   LValue StLVal = CGF.EmitLValueForField(
5224       Result.TDBase,
5225       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5226   const auto *StVar =
5227       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5228   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
5229                        /*IsInitializer=*/true);
5230   // Store reductions address.
5231   LValue RedLVal = CGF.EmitLValueForField(
5232       Result.TDBase,
5233       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5234   if (Data.Reductions) {
5235     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5236   } else {
5237     CGF.EmitNullInitialization(RedLVal.getAddress(),
5238                                CGF.getContext().VoidPtrTy);
5239   }
5240   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5241   llvm::Value *TaskArgs[] = {
5242       UpLoc,
5243       ThreadID,
5244       Result.NewTask,
5245       IfVal,
5246       LBLVal.getPointer(),
5247       UBLVal.getPointer(),
5248       CGF.EmitLoadOfScalar(StLVal, Loc),
5249       llvm::ConstantInt::getNullValue(
5250           CGF.IntTy), // Always 0 because taskgroup emitted by the compiler
5251       llvm::ConstantInt::getSigned(
5252           CGF.IntTy, Data.Schedule.getPointer()
5253                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5254                          : NoSchedule),
5255       Data.Schedule.getPointer()
5256           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5257                                       /*isSigned=*/false)
5258           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5259       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5260                              Result.TaskDupFn, CGF.VoidPtrTy)
5261                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5262   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
5263 }
5264 
5265 /// Emit reduction operation for each element of array (required for
5266 /// array sections) LHS op = RHS.
5267 /// \param Type Type of array.
5268 /// \param LHSVar Variable on the left side of the reduction operation
5269 /// (references element of array in original variable).
5270 /// \param RHSVar Variable on the right side of the reduction operation
5271 /// (references element of array in original variable).
5272 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5273 /// RHSVar.
5274 static void EmitOMPAggregateReduction(
5275     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5276     const VarDecl *RHSVar,
5277     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5278                                   const Expr *, const Expr *)> &RedOpGen,
5279     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5280     const Expr *UpExpr = nullptr) {
5281   // Perform element-by-element initialization.
5282   QualType ElementTy;
5283   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5284   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5285 
5286   // Drill down to the base element type on both arrays.
5287   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5288   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5289 
5290   llvm::Value *RHSBegin = RHSAddr.getPointer();
5291   llvm::Value *LHSBegin = LHSAddr.getPointer();
5292   // Cast from pointer to array type to pointer to single element.
5293   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5294   // The basic structure here is a while-do loop.
5295   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5296   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5297   llvm::Value *IsEmpty =
5298       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5299   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5300 
5301   // Enter the loop body, making that address the current address.
5302   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5303   CGF.EmitBlock(BodyBB);
5304 
5305   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5306 
5307   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5308       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5309   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5310   Address RHSElementCurrent =
5311       Address(RHSElementPHI,
5312               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5313 
5314   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5315       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5316   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5317   Address LHSElementCurrent =
5318       Address(LHSElementPHI,
5319               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5320 
5321   // Emit copy.
5322   CodeGenFunction::OMPPrivateScope Scope(CGF);
5323   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5324   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5325   Scope.Privatize();
5326   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5327   Scope.ForceCleanup();
5328 
5329   // Shift the address forward by one element.
5330   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5331       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5332   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5333       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5334   // Check whether we've reached the end.
5335   llvm::Value *Done =
5336       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5337   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5338   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5339   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5340 
5341   // Done.
5342   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5343 }
5344 
5345 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5346 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5347 /// UDR combiner function.
5348 static void emitReductionCombiner(CodeGenFunction &CGF,
5349                                   const Expr *ReductionOp) {
5350   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5351     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5352       if (const auto *DRE =
5353               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5354         if (const auto *DRD =
5355                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5356           std::pair<llvm::Function *, llvm::Function *> Reduction =
5357               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5358           RValue Func = RValue::get(Reduction.first);
5359           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5360           CGF.EmitIgnoredExpr(ReductionOp);
5361           return;
5362         }
5363   CGF.EmitIgnoredExpr(ReductionOp);
5364 }
5365 
5366 llvm::Value *CGOpenMPRuntime::emitReductionFunction(
5367     CodeGenModule &CGM, SourceLocation Loc, llvm::Type *ArgsType,
5368     ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
5369     ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
5370   ASTContext &C = CGM.getContext();
5371 
5372   // void reduction_func(void *LHSArg, void *RHSArg);
5373   FunctionArgList Args;
5374   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5375                            ImplicitParamDecl::Other);
5376   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5377                            ImplicitParamDecl::Other);
5378   Args.push_back(&LHSArg);
5379   Args.push_back(&RHSArg);
5380   const auto &CGFI =
5381       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5382   std::string Name = getName({"omp", "reduction", "reduction_func"});
5383   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5384                                     llvm::GlobalValue::InternalLinkage, Name,
5385                                     &CGM.getModule());
5386   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5387   Fn->setDoesNotRecurse();
5388   CodeGenFunction CGF(CGM);
5389   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5390 
5391   // Dst = (void*[n])(LHSArg);
5392   // Src = (void*[n])(RHSArg);
5393   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5394       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5395       ArgsType), CGF.getPointerAlign());
5396   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5397       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5398       ArgsType), CGF.getPointerAlign());
5399 
5400   //  ...
5401   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5402   //  ...
5403   CodeGenFunction::OMPPrivateScope Scope(CGF);
5404   auto IPriv = Privates.begin();
5405   unsigned Idx = 0;
5406   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5407     const auto *RHSVar =
5408         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5409     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5410       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5411     });
5412     const auto *LHSVar =
5413         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5414     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5415       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5416     });
5417     QualType PrivTy = (*IPriv)->getType();
5418     if (PrivTy->isVariablyModifiedType()) {
5419       // Get array size and emit VLA type.
5420       ++Idx;
5421       Address Elem =
5422           CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize());
5423       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5424       const VariableArrayType *VLA =
5425           CGF.getContext().getAsVariableArrayType(PrivTy);
5426       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5427       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5428           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5429       CGF.EmitVariablyModifiedType(PrivTy);
5430     }
5431   }
5432   Scope.Privatize();
5433   IPriv = Privates.begin();
5434   auto ILHS = LHSExprs.begin();
5435   auto IRHS = RHSExprs.begin();
5436   for (const Expr *E : ReductionOps) {
5437     if ((*IPriv)->getType()->isArrayType()) {
5438       // Emit reduction for array section.
5439       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5440       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5441       EmitOMPAggregateReduction(
5442           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5443           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5444             emitReductionCombiner(CGF, E);
5445           });
5446     } else {
5447       // Emit reduction for array subscript or single variable.
5448       emitReductionCombiner(CGF, E);
5449     }
5450     ++IPriv;
5451     ++ILHS;
5452     ++IRHS;
5453   }
5454   Scope.ForceCleanup();
5455   CGF.FinishFunction();
5456   return Fn;
5457 }
5458 
5459 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5460                                                   const Expr *ReductionOp,
5461                                                   const Expr *PrivateRef,
5462                                                   const DeclRefExpr *LHS,
5463                                                   const DeclRefExpr *RHS) {
5464   if (PrivateRef->getType()->isArrayType()) {
5465     // Emit reduction for array section.
5466     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5467     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5468     EmitOMPAggregateReduction(
5469         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5470         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5471           emitReductionCombiner(CGF, ReductionOp);
5472         });
5473   } else {
5474     // Emit reduction for array subscript or single variable.
5475     emitReductionCombiner(CGF, ReductionOp);
5476   }
5477 }
5478 
5479 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5480                                     ArrayRef<const Expr *> Privates,
5481                                     ArrayRef<const Expr *> LHSExprs,
5482                                     ArrayRef<const Expr *> RHSExprs,
5483                                     ArrayRef<const Expr *> ReductionOps,
5484                                     ReductionOptionsTy Options) {
5485   if (!CGF.HaveInsertPoint())
5486     return;
5487 
5488   bool WithNowait = Options.WithNowait;
5489   bool SimpleReduction = Options.SimpleReduction;
5490 
5491   // Next code should be emitted for reduction:
5492   //
5493   // static kmp_critical_name lock = { 0 };
5494   //
5495   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5496   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5497   //  ...
5498   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5499   //  *(Type<n>-1*)rhs[<n>-1]);
5500   // }
5501   //
5502   // ...
5503   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5504   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5505   // RedList, reduce_func, &<lock>)) {
5506   // case 1:
5507   //  ...
5508   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5509   //  ...
5510   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5511   // break;
5512   // case 2:
5513   //  ...
5514   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5515   //  ...
5516   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5517   // break;
5518   // default:;
5519   // }
5520   //
5521   // if SimpleReduction is true, only the next code is generated:
5522   //  ...
5523   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5524   //  ...
5525 
5526   ASTContext &C = CGM.getContext();
5527 
5528   if (SimpleReduction) {
5529     CodeGenFunction::RunCleanupsScope Scope(CGF);
5530     auto IPriv = Privates.begin();
5531     auto ILHS = LHSExprs.begin();
5532     auto IRHS = RHSExprs.begin();
5533     for (const Expr *E : ReductionOps) {
5534       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5535                                   cast<DeclRefExpr>(*IRHS));
5536       ++IPriv;
5537       ++ILHS;
5538       ++IRHS;
5539     }
5540     return;
5541   }
5542 
5543   // 1. Build a list of reduction variables.
5544   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5545   auto Size = RHSExprs.size();
5546   for (const Expr *E : Privates) {
5547     if (E->getType()->isVariablyModifiedType())
5548       // Reserve place for array size.
5549       ++Size;
5550   }
5551   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5552   QualType ReductionArrayTy =
5553       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
5554                              /*IndexTypeQuals=*/0);
5555   Address ReductionList =
5556       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5557   auto IPriv = Privates.begin();
5558   unsigned Idx = 0;
5559   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5560     Address Elem =
5561       CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize());
5562     CGF.Builder.CreateStore(
5563         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5564             CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
5565         Elem);
5566     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5567       // Store array size.
5568       ++Idx;
5569       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
5570                                              CGF.getPointerSize());
5571       llvm::Value *Size = CGF.Builder.CreateIntCast(
5572           CGF.getVLASize(
5573                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5574               .NumElts,
5575           CGF.SizeTy, /*isSigned=*/false);
5576       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5577                               Elem);
5578     }
5579   }
5580 
5581   // 2. Emit reduce_func().
5582   llvm::Value *ReductionFn = emitReductionFunction(
5583       CGM, Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(),
5584       Privates, LHSExprs, RHSExprs, ReductionOps);
5585 
5586   // 3. Create static kmp_critical_name lock = { 0 };
5587   std::string Name = getName({"reduction"});
5588   llvm::Value *Lock = getCriticalRegionLock(Name);
5589 
5590   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5591   // RedList, reduce_func, &<lock>);
5592   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5593   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5594   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5595   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5596       ReductionList.getPointer(), CGF.VoidPtrTy);
5597   llvm::Value *Args[] = {
5598       IdentTLoc,                             // ident_t *<loc>
5599       ThreadId,                              // i32 <gtid>
5600       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5601       ReductionArrayTySize,                  // size_type sizeof(RedList)
5602       RL,                                    // void *RedList
5603       ReductionFn, // void (*) (void *, void *) <reduce_func>
5604       Lock         // kmp_critical_name *&<lock>
5605   };
5606   llvm::Value *Res = CGF.EmitRuntimeCall(
5607       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
5608                                        : OMPRTL__kmpc_reduce),
5609       Args);
5610 
5611   // 5. Build switch(res)
5612   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5613   llvm::SwitchInst *SwInst =
5614       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5615 
5616   // 6. Build case 1:
5617   //  ...
5618   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5619   //  ...
5620   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5621   // break;
5622   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5623   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5624   CGF.EmitBlock(Case1BB);
5625 
5626   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5627   llvm::Value *EndArgs[] = {
5628       IdentTLoc, // ident_t *<loc>
5629       ThreadId,  // i32 <gtid>
5630       Lock       // kmp_critical_name *&<lock>
5631   };
5632   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5633                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5634     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5635     auto IPriv = Privates.begin();
5636     auto ILHS = LHSExprs.begin();
5637     auto IRHS = RHSExprs.begin();
5638     for (const Expr *E : ReductionOps) {
5639       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5640                                      cast<DeclRefExpr>(*IRHS));
5641       ++IPriv;
5642       ++ILHS;
5643       ++IRHS;
5644     }
5645   };
5646   RegionCodeGenTy RCG(CodeGen);
5647   CommonActionTy Action(
5648       nullptr, llvm::None,
5649       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
5650                                        : OMPRTL__kmpc_end_reduce),
5651       EndArgs);
5652   RCG.setAction(Action);
5653   RCG(CGF);
5654 
5655   CGF.EmitBranch(DefaultBB);
5656 
5657   // 7. Build case 2:
5658   //  ...
5659   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5660   //  ...
5661   // break;
5662   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5663   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5664   CGF.EmitBlock(Case2BB);
5665 
5666   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5667                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5668     auto ILHS = LHSExprs.begin();
5669     auto IRHS = RHSExprs.begin();
5670     auto IPriv = Privates.begin();
5671     for (const Expr *E : ReductionOps) {
5672       const Expr *XExpr = nullptr;
5673       const Expr *EExpr = nullptr;
5674       const Expr *UpExpr = nullptr;
5675       BinaryOperatorKind BO = BO_Comma;
5676       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5677         if (BO->getOpcode() == BO_Assign) {
5678           XExpr = BO->getLHS();
5679           UpExpr = BO->getRHS();
5680         }
5681       }
5682       // Try to emit update expression as a simple atomic.
5683       const Expr *RHSExpr = UpExpr;
5684       if (RHSExpr) {
5685         // Analyze RHS part of the whole expression.
5686         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5687                 RHSExpr->IgnoreParenImpCasts())) {
5688           // If this is a conditional operator, analyze its condition for
5689           // min/max reduction operator.
5690           RHSExpr = ACO->getCond();
5691         }
5692         if (const auto *BORHS =
5693                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5694           EExpr = BORHS->getRHS();
5695           BO = BORHS->getOpcode();
5696         }
5697       }
5698       if (XExpr) {
5699         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5700         auto &&AtomicRedGen = [BO, VD,
5701                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5702                                     const Expr *EExpr, const Expr *UpExpr) {
5703           LValue X = CGF.EmitLValue(XExpr);
5704           RValue E;
5705           if (EExpr)
5706             E = CGF.EmitAnyExpr(EExpr);
5707           CGF.EmitOMPAtomicSimpleUpdateExpr(
5708               X, E, BO, /*IsXLHSInRHSPart=*/true,
5709               llvm::AtomicOrdering::Monotonic, Loc,
5710               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5711                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5712                 PrivateScope.addPrivate(
5713                     VD, [&CGF, VD, XRValue, Loc]() {
5714                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5715                       CGF.emitOMPSimpleStore(
5716                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5717                           VD->getType().getNonReferenceType(), Loc);
5718                       return LHSTemp;
5719                     });
5720                 (void)PrivateScope.Privatize();
5721                 return CGF.EmitAnyExpr(UpExpr);
5722               });
5723         };
5724         if ((*IPriv)->getType()->isArrayType()) {
5725           // Emit atomic reduction for array section.
5726           const auto *RHSVar =
5727               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5728           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5729                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5730         } else {
5731           // Emit atomic reduction for array subscript or single variable.
5732           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5733         }
5734       } else {
5735         // Emit as a critical region.
5736         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5737                                            const Expr *, const Expr *) {
5738           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5739           std::string Name = RT.getName({"atomic_reduction"});
5740           RT.emitCriticalRegion(
5741               CGF, Name,
5742               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5743                 Action.Enter(CGF);
5744                 emitReductionCombiner(CGF, E);
5745               },
5746               Loc);
5747         };
5748         if ((*IPriv)->getType()->isArrayType()) {
5749           const auto *LHSVar =
5750               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5751           const auto *RHSVar =
5752               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5753           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5754                                     CritRedGen);
5755         } else {
5756           CritRedGen(CGF, nullptr, nullptr, nullptr);
5757         }
5758       }
5759       ++ILHS;
5760       ++IRHS;
5761       ++IPriv;
5762     }
5763   };
5764   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5765   if (!WithNowait) {
5766     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5767     llvm::Value *EndArgs[] = {
5768         IdentTLoc, // ident_t *<loc>
5769         ThreadId,  // i32 <gtid>
5770         Lock       // kmp_critical_name *&<lock>
5771     };
5772     CommonActionTy Action(nullptr, llvm::None,
5773                           createRuntimeFunction(OMPRTL__kmpc_end_reduce),
5774                           EndArgs);
5775     AtomicRCG.setAction(Action);
5776     AtomicRCG(CGF);
5777   } else {
5778     AtomicRCG(CGF);
5779   }
5780 
5781   CGF.EmitBranch(DefaultBB);
5782   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5783 }
5784 
5785 /// Generates unique name for artificial threadprivate variables.
5786 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5787 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5788                                       const Expr *Ref) {
5789   SmallString<256> Buffer;
5790   llvm::raw_svector_ostream Out(Buffer);
5791   const clang::DeclRefExpr *DE;
5792   const VarDecl *D = ::getBaseDecl(Ref, DE);
5793   if (!D)
5794     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5795   D = D->getCanonicalDecl();
5796   std::string Name = CGM.getOpenMPRuntime().getName(
5797       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5798   Out << Prefix << Name << "_"
5799       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5800   return Out.str();
5801 }
5802 
5803 /// Emits reduction initializer function:
5804 /// \code
5805 /// void @.red_init(void* %arg) {
5806 /// %0 = bitcast void* %arg to <type>*
5807 /// store <type> <init>, <type>* %0
5808 /// ret void
5809 /// }
5810 /// \endcode
5811 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5812                                            SourceLocation Loc,
5813                                            ReductionCodeGen &RCG, unsigned N) {
5814   ASTContext &C = CGM.getContext();
5815   FunctionArgList Args;
5816   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5817                           ImplicitParamDecl::Other);
5818   Args.emplace_back(&Param);
5819   const auto &FnInfo =
5820       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5821   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5822   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5823   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5824                                     Name, &CGM.getModule());
5825   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5826   Fn->setDoesNotRecurse();
5827   CodeGenFunction CGF(CGM);
5828   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5829   Address PrivateAddr = CGF.EmitLoadOfPointer(
5830       CGF.GetAddrOfLocalVar(&Param),
5831       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5832   llvm::Value *Size = nullptr;
5833   // If the size of the reduction item is non-constant, load it from global
5834   // threadprivate variable.
5835   if (RCG.getSizes(N).second) {
5836     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5837         CGF, CGM.getContext().getSizeType(),
5838         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5839     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5840                                 CGM.getContext().getSizeType(), Loc);
5841   }
5842   RCG.emitAggregateType(CGF, N, Size);
5843   LValue SharedLVal;
5844   // If initializer uses initializer from declare reduction construct, emit a
5845   // pointer to the address of the original reduction item (reuired by reduction
5846   // initializer)
5847   if (RCG.usesReductionInitializer(N)) {
5848     Address SharedAddr =
5849         CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5850             CGF, CGM.getContext().VoidPtrTy,
5851             generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
5852     SharedAddr = CGF.EmitLoadOfPointer(
5853         SharedAddr,
5854         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5855     SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5856   } else {
5857     SharedLVal = CGF.MakeNaturalAlignAddrLValue(
5858         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5859         CGM.getContext().VoidPtrTy);
5860   }
5861   // Emit the initializer:
5862   // %0 = bitcast void* %arg to <type>*
5863   // store <type> <init>, <type>* %0
5864   RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
5865                          [](CodeGenFunction &) { return false; });
5866   CGF.FinishFunction();
5867   return Fn;
5868 }
5869 
5870 /// Emits reduction combiner function:
5871 /// \code
5872 /// void @.red_comb(void* %arg0, void* %arg1) {
5873 /// %lhs = bitcast void* %arg0 to <type>*
5874 /// %rhs = bitcast void* %arg1 to <type>*
5875 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5876 /// store <type> %2, <type>* %lhs
5877 /// ret void
5878 /// }
5879 /// \endcode
5880 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5881                                            SourceLocation Loc,
5882                                            ReductionCodeGen &RCG, unsigned N,
5883                                            const Expr *ReductionOp,
5884                                            const Expr *LHS, const Expr *RHS,
5885                                            const Expr *PrivateRef) {
5886   ASTContext &C = CGM.getContext();
5887   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5888   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5889   FunctionArgList Args;
5890   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5891                                C.VoidPtrTy, ImplicitParamDecl::Other);
5892   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5893                             ImplicitParamDecl::Other);
5894   Args.emplace_back(&ParamInOut);
5895   Args.emplace_back(&ParamIn);
5896   const auto &FnInfo =
5897       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5898   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5899   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5900   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5901                                     Name, &CGM.getModule());
5902   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5903   Fn->setDoesNotRecurse();
5904   CodeGenFunction CGF(CGM);
5905   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5906   llvm::Value *Size = nullptr;
5907   // If the size of the reduction item is non-constant, load it from global
5908   // threadprivate variable.
5909   if (RCG.getSizes(N).second) {
5910     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5911         CGF, CGM.getContext().getSizeType(),
5912         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5913     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5914                                 CGM.getContext().getSizeType(), Loc);
5915   }
5916   RCG.emitAggregateType(CGF, N, Size);
5917   // Remap lhs and rhs variables to the addresses of the function arguments.
5918   // %lhs = bitcast void* %arg0 to <type>*
5919   // %rhs = bitcast void* %arg1 to <type>*
5920   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5921   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5922     // Pull out the pointer to the variable.
5923     Address PtrAddr = CGF.EmitLoadOfPointer(
5924         CGF.GetAddrOfLocalVar(&ParamInOut),
5925         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5926     return CGF.Builder.CreateElementBitCast(
5927         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5928   });
5929   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5930     // Pull out the pointer to the variable.
5931     Address PtrAddr = CGF.EmitLoadOfPointer(
5932         CGF.GetAddrOfLocalVar(&ParamIn),
5933         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5934     return CGF.Builder.CreateElementBitCast(
5935         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5936   });
5937   PrivateScope.Privatize();
5938   // Emit the combiner body:
5939   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5940   // store <type> %2, <type>* %lhs
5941   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5942       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5943       cast<DeclRefExpr>(RHS));
5944   CGF.FinishFunction();
5945   return Fn;
5946 }
5947 
5948 /// Emits reduction finalizer function:
5949 /// \code
5950 /// void @.red_fini(void* %arg) {
5951 /// %0 = bitcast void* %arg to <type>*
5952 /// <destroy>(<type>* %0)
5953 /// ret void
5954 /// }
5955 /// \endcode
5956 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5957                                            SourceLocation Loc,
5958                                            ReductionCodeGen &RCG, unsigned N) {
5959   if (!RCG.needCleanups(N))
5960     return nullptr;
5961   ASTContext &C = CGM.getContext();
5962   FunctionArgList Args;
5963   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5964                           ImplicitParamDecl::Other);
5965   Args.emplace_back(&Param);
5966   const auto &FnInfo =
5967       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5968   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5969   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5970   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5971                                     Name, &CGM.getModule());
5972   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5973   Fn->setDoesNotRecurse();
5974   CodeGenFunction CGF(CGM);
5975   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5976   Address PrivateAddr = CGF.EmitLoadOfPointer(
5977       CGF.GetAddrOfLocalVar(&Param),
5978       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5979   llvm::Value *Size = nullptr;
5980   // If the size of the reduction item is non-constant, load it from global
5981   // threadprivate variable.
5982   if (RCG.getSizes(N).second) {
5983     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5984         CGF, CGM.getContext().getSizeType(),
5985         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5986     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5987                                 CGM.getContext().getSizeType(), Loc);
5988   }
5989   RCG.emitAggregateType(CGF, N, Size);
5990   // Emit the finalizer body:
5991   // <destroy>(<type>* %0)
5992   RCG.emitCleanups(CGF, N, PrivateAddr);
5993   CGF.FinishFunction();
5994   return Fn;
5995 }
5996 
5997 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5998     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5999     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6000   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6001     return nullptr;
6002 
6003   // Build typedef struct:
6004   // kmp_task_red_input {
6005   //   void *reduce_shar; // shared reduction item
6006   //   size_t reduce_size; // size of data item
6007   //   void *reduce_init; // data initialization routine
6008   //   void *reduce_fini; // data finalization routine
6009   //   void *reduce_comb; // data combiner routine
6010   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6011   // } kmp_task_red_input_t;
6012   ASTContext &C = CGM.getContext();
6013   RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t");
6014   RD->startDefinition();
6015   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6016   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6017   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6018   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6019   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6020   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6021       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6022   RD->completeDefinition();
6023   QualType RDType = C.getRecordType(RD);
6024   unsigned Size = Data.ReductionVars.size();
6025   llvm::APInt ArraySize(/*numBits=*/64, Size);
6026   QualType ArrayRDType = C.getConstantArrayType(
6027       RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0);
6028   // kmp_task_red_input_t .rd_input.[Size];
6029   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6030   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
6031                        Data.ReductionOps);
6032   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6033     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6034     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6035                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6036     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6037         TaskRedInput.getPointer(), Idxs,
6038         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6039         ".rd_input.gep.");
6040     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6041     // ElemLVal.reduce_shar = &Shareds[Cnt];
6042     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6043     RCG.emitSharedLValue(CGF, Cnt);
6044     llvm::Value *CastedShared =
6045         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer());
6046     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6047     RCG.emitAggregateType(CGF, Cnt);
6048     llvm::Value *SizeValInChars;
6049     llvm::Value *SizeVal;
6050     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6051     // We use delayed creation/initialization for VLAs, array sections and
6052     // custom reduction initializations. It is required because runtime does not
6053     // provide the way to pass the sizes of VLAs/array sections to
6054     // initializer/combiner/finalizer functions and does not pass the pointer to
6055     // original reduction item to the initializer. Instead threadprivate global
6056     // variables are used to store these values and use them in the functions.
6057     bool DelayedCreation = !!SizeVal;
6058     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6059                                                /*isSigned=*/false);
6060     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6061     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6062     // ElemLVal.reduce_init = init;
6063     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6064     llvm::Value *InitAddr =
6065         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6066     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6067     DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
6068     // ElemLVal.reduce_fini = fini;
6069     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6070     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6071     llvm::Value *FiniAddr = Fini
6072                                 ? CGF.EmitCastToVoidPtr(Fini)
6073                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6074     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6075     // ElemLVal.reduce_comb = comb;
6076     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6077     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6078         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6079         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6080     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6081     // ElemLVal.flags = 0;
6082     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6083     if (DelayedCreation) {
6084       CGF.EmitStoreOfScalar(
6085           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*IsSigned=*/true),
6086           FlagsLVal);
6087     } else
6088       CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
6089   }
6090   // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
6091   // *data);
6092   llvm::Value *Args[] = {
6093       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6094                                 /*isSigned=*/true),
6095       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6096       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6097                                                       CGM.VoidPtrTy)};
6098   return CGF.EmitRuntimeCall(
6099       createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args);
6100 }
6101 
6102 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6103                                               SourceLocation Loc,
6104                                               ReductionCodeGen &RCG,
6105                                               unsigned N) {
6106   auto Sizes = RCG.getSizes(N);
6107   // Emit threadprivate global variable if the type is non-constant
6108   // (Sizes.second = nullptr).
6109   if (Sizes.second) {
6110     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6111                                                      /*isSigned=*/false);
6112     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6113         CGF, CGM.getContext().getSizeType(),
6114         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6115     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6116   }
6117   // Store address of the original reduction item if custom initializer is used.
6118   if (RCG.usesReductionInitializer(N)) {
6119     Address SharedAddr = getAddrOfArtificialThreadPrivate(
6120         CGF, CGM.getContext().VoidPtrTy,
6121         generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6122     CGF.Builder.CreateStore(
6123         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6124             RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy),
6125         SharedAddr, /*IsVolatile=*/false);
6126   }
6127 }
6128 
6129 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6130                                               SourceLocation Loc,
6131                                               llvm::Value *ReductionsPtr,
6132                                               LValue SharedLVal) {
6133   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6134   // *d);
6135   llvm::Value *Args[] = {
6136       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6137                                 /*isSigned=*/true),
6138       ReductionsPtr,
6139       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(),
6140                                                       CGM.VoidPtrTy)};
6141   return Address(
6142       CGF.EmitRuntimeCall(
6143           createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
6144       SharedLVal.getAlignment());
6145 }
6146 
6147 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6148                                        SourceLocation Loc) {
6149   if (!CGF.HaveInsertPoint())
6150     return;
6151   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6152   // global_tid);
6153   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6154   // Ignore return result until untied tasks are supported.
6155   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
6156   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6157     Region->emitUntiedSwitch(CGF);
6158 }
6159 
6160 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6161                                            OpenMPDirectiveKind InnerKind,
6162                                            const RegionCodeGenTy &CodeGen,
6163                                            bool HasCancel) {
6164   if (!CGF.HaveInsertPoint())
6165     return;
6166   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6167   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6168 }
6169 
6170 namespace {
6171 enum RTCancelKind {
6172   CancelNoreq = 0,
6173   CancelParallel = 1,
6174   CancelLoop = 2,
6175   CancelSections = 3,
6176   CancelTaskgroup = 4
6177 };
6178 } // anonymous namespace
6179 
6180 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6181   RTCancelKind CancelKind = CancelNoreq;
6182   if (CancelRegion == OMPD_parallel)
6183     CancelKind = CancelParallel;
6184   else if (CancelRegion == OMPD_for)
6185     CancelKind = CancelLoop;
6186   else if (CancelRegion == OMPD_sections)
6187     CancelKind = CancelSections;
6188   else {
6189     assert(CancelRegion == OMPD_taskgroup);
6190     CancelKind = CancelTaskgroup;
6191   }
6192   return CancelKind;
6193 }
6194 
6195 void CGOpenMPRuntime::emitCancellationPointCall(
6196     CodeGenFunction &CGF, SourceLocation Loc,
6197     OpenMPDirectiveKind CancelRegion) {
6198   if (!CGF.HaveInsertPoint())
6199     return;
6200   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6201   // global_tid, kmp_int32 cncl_kind);
6202   if (auto *OMPRegionInfo =
6203           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6204     // For 'cancellation point taskgroup', the task region info may not have a
6205     // cancel. This may instead happen in another adjacent task.
6206     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6207       llvm::Value *Args[] = {
6208           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6209           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6210       // Ignore return result until untied tasks are supported.
6211       llvm::Value *Result = CGF.EmitRuntimeCall(
6212           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
6213       // if (__kmpc_cancellationpoint()) {
6214       //   exit from construct;
6215       // }
6216       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6217       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6218       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6219       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6220       CGF.EmitBlock(ExitBB);
6221       // exit from construct;
6222       CodeGenFunction::JumpDest CancelDest =
6223           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6224       CGF.EmitBranchThroughCleanup(CancelDest);
6225       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6226     }
6227   }
6228 }
6229 
6230 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6231                                      const Expr *IfCond,
6232                                      OpenMPDirectiveKind CancelRegion) {
6233   if (!CGF.HaveInsertPoint())
6234     return;
6235   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6236   // kmp_int32 cncl_kind);
6237   if (auto *OMPRegionInfo =
6238           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6239     auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
6240                                                         PrePostActionTy &) {
6241       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6242       llvm::Value *Args[] = {
6243           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6244           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6245       // Ignore return result until untied tasks are supported.
6246       llvm::Value *Result = CGF.EmitRuntimeCall(
6247           RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
6248       // if (__kmpc_cancel()) {
6249       //   exit from construct;
6250       // }
6251       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6252       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6253       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6254       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6255       CGF.EmitBlock(ExitBB);
6256       // exit from construct;
6257       CodeGenFunction::JumpDest CancelDest =
6258           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6259       CGF.EmitBranchThroughCleanup(CancelDest);
6260       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6261     };
6262     if (IfCond) {
6263       emitOMPIfClause(CGF, IfCond, ThenGen,
6264                       [](CodeGenFunction &, PrePostActionTy &) {});
6265     } else {
6266       RegionCodeGenTy ThenRCG(ThenGen);
6267       ThenRCG(CGF);
6268     }
6269   }
6270 }
6271 
6272 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6273     const OMPExecutableDirective &D, StringRef ParentName,
6274     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6275     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6276   assert(!ParentName.empty() && "Invalid target region parent name!");
6277   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6278                                    IsOffloadEntry, CodeGen);
6279 }
6280 
6281 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6282     const OMPExecutableDirective &D, StringRef ParentName,
6283     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6284     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6285   // Create a unique name for the entry function using the source location
6286   // information of the current target region. The name will be something like:
6287   //
6288   // __omp_offloading_DD_FFFF_PP_lBB
6289   //
6290   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6291   // mangled name of the function that encloses the target region and BB is the
6292   // line number of the target region.
6293 
6294   unsigned DeviceID;
6295   unsigned FileID;
6296   unsigned Line;
6297   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6298                            Line);
6299   SmallString<64> EntryFnName;
6300   {
6301     llvm::raw_svector_ostream OS(EntryFnName);
6302     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6303        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6304   }
6305 
6306   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6307 
6308   CodeGenFunction CGF(CGM, true);
6309   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6310   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6311 
6312   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
6313 
6314   // If this target outline function is not an offload entry, we don't need to
6315   // register it.
6316   if (!IsOffloadEntry)
6317     return;
6318 
6319   // The target region ID is used by the runtime library to identify the current
6320   // target region, so it only has to be unique and not necessarily point to
6321   // anything. It could be the pointer to the outlined function that implements
6322   // the target region, but we aren't using that so that the compiler doesn't
6323   // need to keep that, and could therefore inline the host function if proven
6324   // worthwhile during optimization. In the other hand, if emitting code for the
6325   // device, the ID has to be the function address so that it can retrieved from
6326   // the offloading entry and launched by the runtime library. We also mark the
6327   // outlined function to have external linkage in case we are emitting code for
6328   // the device, because these functions will be entry points to the device.
6329 
6330   if (CGM.getLangOpts().OpenMPIsDevice) {
6331     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6332     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6333     OutlinedFn->setDSOLocal(false);
6334   } else {
6335     std::string Name = getName({EntryFnName, "region_id"});
6336     OutlinedFnID = new llvm::GlobalVariable(
6337         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6338         llvm::GlobalValue::WeakAnyLinkage,
6339         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6340   }
6341 
6342   // Register the information for the entry associated with this target region.
6343   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6344       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6345       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6346 }
6347 
6348 /// discard all CompoundStmts intervening between two constructs
6349 static const Stmt *ignoreCompoundStmts(const Stmt *Body) {
6350   while (const auto *CS = dyn_cast_or_null<CompoundStmt>(Body))
6351     Body = CS->body_front();
6352 
6353   return Body;
6354 }
6355 
6356 /// Emit the number of teams for a target directive.  Inspect the num_teams
6357 /// clause associated with a teams construct combined or closely nested
6358 /// with the target directive.
6359 ///
6360 /// Emit a team of size one for directives such as 'target parallel' that
6361 /// have no associated teams construct.
6362 ///
6363 /// Otherwise, return nullptr.
6364 static llvm::Value *
6365 emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
6366                                CodeGenFunction &CGF,
6367                                const OMPExecutableDirective &D) {
6368   assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
6369                                               "teams directive expected to be "
6370                                               "emitted only for the host!");
6371 
6372   CGBuilderTy &Bld = CGF.Builder;
6373 
6374   // If the target directive is combined with a teams directive:
6375   //   Return the value in the num_teams clause, if any.
6376   //   Otherwise, return 0 to denote the runtime default.
6377   if (isOpenMPTeamsDirective(D.getDirectiveKind())) {
6378     if (const auto *NumTeamsClause = D.getSingleClause<OMPNumTeamsClause>()) {
6379       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6380       llvm::Value *NumTeams = CGF.EmitScalarExpr(NumTeamsClause->getNumTeams(),
6381                                                  /*IgnoreResultAssign*/ true);
6382       return Bld.CreateIntCast(NumTeams, CGF.Int32Ty,
6383                                /*IsSigned=*/true);
6384     }
6385 
6386     // The default value is 0.
6387     return Bld.getInt32(0);
6388   }
6389 
6390   // If the target directive is combined with a parallel directive but not a
6391   // teams directive, start one team.
6392   if (isOpenMPParallelDirective(D.getDirectiveKind()))
6393     return Bld.getInt32(1);
6394 
6395   // If the current target region has a teams region enclosed, we need to get
6396   // the number of teams to pass to the runtime function call. This is done
6397   // by generating the expression in a inlined region. This is required because
6398   // the expression is captured in the enclosing target environment when the
6399   // teams directive is not combined with target.
6400 
6401   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6402 
6403   if (const auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>(
6404           ignoreCompoundStmts(CS.getCapturedStmt()))) {
6405     if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) {
6406       if (const auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) {
6407         CGOpenMPInnerExprInfo CGInfo(CGF, CS);
6408         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6409         llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams());
6410         return Bld.CreateIntCast(NumTeams, CGF.Int32Ty,
6411                                  /*IsSigned=*/true);
6412       }
6413 
6414       // If we have an enclosed teams directive but no num_teams clause we use
6415       // the default value 0.
6416       return Bld.getInt32(0);
6417     }
6418   }
6419 
6420   // No teams associated with the directive.
6421   return nullptr;
6422 }
6423 
6424 /// Emit the number of threads for a target directive.  Inspect the
6425 /// thread_limit clause associated with a teams construct combined or closely
6426 /// nested with the target directive.
6427 ///
6428 /// Emit the num_threads clause for directives such as 'target parallel' that
6429 /// have no associated teams construct.
6430 ///
6431 /// Otherwise, return nullptr.
6432 static llvm::Value *
6433 emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
6434                                  CodeGenFunction &CGF,
6435                                  const OMPExecutableDirective &D) {
6436   assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
6437                                               "teams directive expected to be "
6438                                               "emitted only for the host!");
6439 
6440   CGBuilderTy &Bld = CGF.Builder;
6441 
6442   //
6443   // If the target directive is combined with a teams directive:
6444   //   Return the value in the thread_limit clause, if any.
6445   //
6446   // If the target directive is combined with a parallel directive:
6447   //   Return the value in the num_threads clause, if any.
6448   //
6449   // If both clauses are set, select the minimum of the two.
6450   //
6451   // If neither teams or parallel combined directives set the number of threads
6452   // in a team, return 0 to denote the runtime default.
6453   //
6454   // If this is not a teams directive return nullptr.
6455 
6456   if (isOpenMPTeamsDirective(D.getDirectiveKind()) ||
6457       isOpenMPParallelDirective(D.getDirectiveKind())) {
6458     llvm::Value *DefaultThreadLimitVal = Bld.getInt32(0);
6459     llvm::Value *NumThreadsVal = nullptr;
6460     llvm::Value *ThreadLimitVal = nullptr;
6461 
6462     if (const auto *ThreadLimitClause =
6463             D.getSingleClause<OMPThreadLimitClause>()) {
6464       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6465       llvm::Value *ThreadLimit =
6466           CGF.EmitScalarExpr(ThreadLimitClause->getThreadLimit(),
6467                              /*IgnoreResultAssign*/ true);
6468       ThreadLimitVal = Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty,
6469                                          /*IsSigned=*/true);
6470     }
6471 
6472     if (const auto *NumThreadsClause =
6473             D.getSingleClause<OMPNumThreadsClause>()) {
6474       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6475       llvm::Value *NumThreads =
6476           CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
6477                              /*IgnoreResultAssign*/ true);
6478       NumThreadsVal =
6479           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/true);
6480     }
6481 
6482     // Select the lesser of thread_limit and num_threads.
6483     if (NumThreadsVal)
6484       ThreadLimitVal = ThreadLimitVal
6485                            ? Bld.CreateSelect(Bld.CreateICmpSLT(NumThreadsVal,
6486                                                                 ThreadLimitVal),
6487                                               NumThreadsVal, ThreadLimitVal)
6488                            : NumThreadsVal;
6489 
6490     // Set default value passed to the runtime if either teams or a target
6491     // parallel type directive is found but no clause is specified.
6492     if (!ThreadLimitVal)
6493       ThreadLimitVal = DefaultThreadLimitVal;
6494 
6495     return ThreadLimitVal;
6496   }
6497 
6498   // If the current target region has a teams region enclosed, we need to get
6499   // the thread limit to pass to the runtime function call. This is done
6500   // by generating the expression in a inlined region. This is required because
6501   // the expression is captured in the enclosing target environment when the
6502   // teams directive is not combined with target.
6503 
6504   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6505 
6506   if (const auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>(
6507           ignoreCompoundStmts(CS.getCapturedStmt()))) {
6508     if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) {
6509       if (const auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) {
6510         CGOpenMPInnerExprInfo CGInfo(CGF, CS);
6511         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6512         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit());
6513         return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty,
6514                                          /*IsSigned=*/true);
6515       }
6516 
6517       // If we have an enclosed teams directive but no thread_limit clause we
6518       // use the default value 0.
6519       return CGF.Builder.getInt32(0);
6520     }
6521   }
6522 
6523   // No teams associated with the directive.
6524   return nullptr;
6525 }
6526 
6527 namespace {
6528 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6529 
6530 // Utility to handle information from clauses associated with a given
6531 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6532 // It provides a convenient interface to obtain the information and generate
6533 // code for that information.
6534 class MappableExprsHandler {
6535 public:
6536   /// Values for bit flags used to specify the mapping type for
6537   /// offloading.
6538   enum OpenMPOffloadMappingFlags : uint64_t {
6539     /// No flags
6540     OMP_MAP_NONE = 0x0,
6541     /// Allocate memory on the device and move data from host to device.
6542     OMP_MAP_TO = 0x01,
6543     /// Allocate memory on the device and move data from device to host.
6544     OMP_MAP_FROM = 0x02,
6545     /// Always perform the requested mapping action on the element, even
6546     /// if it was already mapped before.
6547     OMP_MAP_ALWAYS = 0x04,
6548     /// Delete the element from the device environment, ignoring the
6549     /// current reference count associated with the element.
6550     OMP_MAP_DELETE = 0x08,
6551     /// The element being mapped is a pointer-pointee pair; both the
6552     /// pointer and the pointee should be mapped.
6553     OMP_MAP_PTR_AND_OBJ = 0x10,
6554     /// This flags signals that the base address of an entry should be
6555     /// passed to the target kernel as an argument.
6556     OMP_MAP_TARGET_PARAM = 0x20,
6557     /// Signal that the runtime library has to return the device pointer
6558     /// in the current position for the data being mapped. Used when we have the
6559     /// use_device_ptr clause.
6560     OMP_MAP_RETURN_PARAM = 0x40,
6561     /// This flag signals that the reference being passed is a pointer to
6562     /// private data.
6563     OMP_MAP_PRIVATE = 0x80,
6564     /// Pass the element to the device by value.
6565     OMP_MAP_LITERAL = 0x100,
6566     /// Implicit map
6567     OMP_MAP_IMPLICIT = 0x200,
6568     /// The 16 MSBs of the flags indicate whether the entry is member of some
6569     /// struct/class.
6570     OMP_MAP_MEMBER_OF = 0xffff000000000000,
6571     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
6572   };
6573 
6574   /// Class that associates information with a base pointer to be passed to the
6575   /// runtime library.
6576   class BasePointerInfo {
6577     /// The base pointer.
6578     llvm::Value *Ptr = nullptr;
6579     /// The base declaration that refers to this device pointer, or null if
6580     /// there is none.
6581     const ValueDecl *DevPtrDecl = nullptr;
6582 
6583   public:
6584     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
6585         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
6586     llvm::Value *operator*() const { return Ptr; }
6587     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
6588     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
6589   };
6590 
6591   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
6592   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
6593   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
6594 
6595   /// Map between a struct and the its lowest & highest elements which have been
6596   /// mapped.
6597   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6598   ///                    HE(FieldIndex, Pointer)}
6599   struct StructRangeInfoTy {
6600     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
6601         0, Address::invalid()};
6602     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
6603         0, Address::invalid()};
6604     Address Base = Address::invalid();
6605   };
6606 
6607 private:
6608   /// Kind that defines how a device pointer has to be returned.
6609   struct MapInfo {
6610     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
6611     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
6612     OpenMPMapClauseKind MapTypeModifier = OMPC_MAP_unknown;
6613     bool ReturnDevicePointer = false;
6614     bool IsImplicit = false;
6615 
6616     MapInfo() = default;
6617     MapInfo(
6618         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6619         OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier,
6620         bool ReturnDevicePointer, bool IsImplicit)
6621         : Components(Components), MapType(MapType),
6622           MapTypeModifier(MapTypeModifier),
6623           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
6624   };
6625 
6626   /// If use_device_ptr is used on a pointer which is a struct member and there
6627   /// is no map information about it, then emission of that entry is deferred
6628   /// until the whole struct has been processed.
6629   struct DeferredDevicePtrEntryTy {
6630     const Expr *IE = nullptr;
6631     const ValueDecl *VD = nullptr;
6632 
6633     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD)
6634         : IE(IE), VD(VD) {}
6635   };
6636 
6637   /// Directive from where the map clauses were extracted.
6638   const OMPExecutableDirective &CurDir;
6639 
6640   /// Function the directive is being generated for.
6641   CodeGenFunction &CGF;
6642 
6643   /// Set of all first private variables in the current directive.
6644   llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls;
6645 
6646   /// Map between device pointer declarations and their expression components.
6647   /// The key value for declarations in 'this' is null.
6648   llvm::DenseMap<
6649       const ValueDecl *,
6650       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6651       DevPointersMap;
6652 
6653   llvm::Value *getExprTypeSize(const Expr *E) const {
6654     QualType ExprTy = E->getType().getCanonicalType();
6655 
6656     // Reference types are ignored for mapping purposes.
6657     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
6658       ExprTy = RefTy->getPointeeType().getCanonicalType();
6659 
6660     // Given that an array section is considered a built-in type, we need to
6661     // do the calculation based on the length of the section instead of relying
6662     // on CGF.getTypeSize(E->getType()).
6663     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
6664       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
6665                             OAE->getBase()->IgnoreParenImpCasts())
6666                             .getCanonicalType();
6667 
6668       // If there is no length associated with the expression, that means we
6669       // are using the whole length of the base.
6670       if (!OAE->getLength() && OAE->getColonLoc().isValid())
6671         return CGF.getTypeSize(BaseTy);
6672 
6673       llvm::Value *ElemSize;
6674       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
6675         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
6676       } else {
6677         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
6678         assert(ATy && "Expecting array type if not a pointer type.");
6679         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
6680       }
6681 
6682       // If we don't have a length at this point, that is because we have an
6683       // array section with a single element.
6684       if (!OAE->getLength())
6685         return ElemSize;
6686 
6687       llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength());
6688       LengthVal =
6689           CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false);
6690       return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
6691     }
6692     return CGF.getTypeSize(ExprTy);
6693   }
6694 
6695   /// Return the corresponding bits for a given map clause modifier. Add
6696   /// a flag marking the map as a pointer if requested. Add a flag marking the
6697   /// map as the first one of a series of maps that relate to the same map
6698   /// expression.
6699   OpenMPOffloadMappingFlags getMapTypeBits(OpenMPMapClauseKind MapType,
6700                                            OpenMPMapClauseKind MapTypeModifier,
6701                                            bool IsImplicit, bool AddPtrFlag,
6702                                            bool AddIsTargetParamFlag) const {
6703     OpenMPOffloadMappingFlags Bits =
6704         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
6705     switch (MapType) {
6706     case OMPC_MAP_alloc:
6707     case OMPC_MAP_release:
6708       // alloc and release is the default behavior in the runtime library,  i.e.
6709       // if we don't pass any bits alloc/release that is what the runtime is
6710       // going to do. Therefore, we don't need to signal anything for these two
6711       // type modifiers.
6712       break;
6713     case OMPC_MAP_to:
6714       Bits |= OMP_MAP_TO;
6715       break;
6716     case OMPC_MAP_from:
6717       Bits |= OMP_MAP_FROM;
6718       break;
6719     case OMPC_MAP_tofrom:
6720       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
6721       break;
6722     case OMPC_MAP_delete:
6723       Bits |= OMP_MAP_DELETE;
6724       break;
6725     case OMPC_MAP_always:
6726     case OMPC_MAP_unknown:
6727       llvm_unreachable("Unexpected map type!");
6728     }
6729     if (AddPtrFlag)
6730       Bits |= OMP_MAP_PTR_AND_OBJ;
6731     if (AddIsTargetParamFlag)
6732       Bits |= OMP_MAP_TARGET_PARAM;
6733     if (MapTypeModifier == OMPC_MAP_always)
6734       Bits |= OMP_MAP_ALWAYS;
6735     return Bits;
6736   }
6737 
6738   /// Return true if the provided expression is a final array section. A
6739   /// final array section, is one whose length can't be proved to be one.
6740   bool isFinalArraySectionExpression(const Expr *E) const {
6741     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
6742 
6743     // It is not an array section and therefore not a unity-size one.
6744     if (!OASE)
6745       return false;
6746 
6747     // An array section with no colon always refer to a single element.
6748     if (OASE->getColonLoc().isInvalid())
6749       return false;
6750 
6751     const Expr *Length = OASE->getLength();
6752 
6753     // If we don't have a length we have to check if the array has size 1
6754     // for this dimension. Also, we should always expect a length if the
6755     // base type is pointer.
6756     if (!Length) {
6757       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
6758                              OASE->getBase()->IgnoreParenImpCasts())
6759                              .getCanonicalType();
6760       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
6761         return ATy->getSize().getSExtValue() != 1;
6762       // If we don't have a constant dimension length, we have to consider
6763       // the current section as having any size, so it is not necessarily
6764       // unitary. If it happen to be unity size, that's user fault.
6765       return true;
6766     }
6767 
6768     // Check if the length evaluates to 1.
6769     llvm::APSInt ConstLength;
6770     if (!Length->EvaluateAsInt(ConstLength, CGF.getContext()))
6771       return true; // Can have more that size 1.
6772 
6773     return ConstLength.getSExtValue() != 1;
6774   }
6775 
6776   /// Generate the base pointers, section pointers, sizes and map type
6777   /// bits for the provided map type, map modifier, and expression components.
6778   /// \a IsFirstComponent should be set to true if the provided set of
6779   /// components is the first associated with a capture.
6780   void generateInfoForComponentList(
6781       OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier,
6782       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6783       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
6784       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
6785       StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
6786       bool IsImplicit,
6787       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
6788           OverlappedElements = llvm::None) const {
6789     // The following summarizes what has to be generated for each map and the
6790     // types below. The generated information is expressed in this order:
6791     // base pointer, section pointer, size, flags
6792     // (to add to the ones that come from the map type and modifier).
6793     //
6794     // double d;
6795     // int i[100];
6796     // float *p;
6797     //
6798     // struct S1 {
6799     //   int i;
6800     //   float f[50];
6801     // }
6802     // struct S2 {
6803     //   int i;
6804     //   float f[50];
6805     //   S1 s;
6806     //   double *p;
6807     //   struct S2 *ps;
6808     // }
6809     // S2 s;
6810     // S2 *ps;
6811     //
6812     // map(d)
6813     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
6814     //
6815     // map(i)
6816     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
6817     //
6818     // map(i[1:23])
6819     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
6820     //
6821     // map(p)
6822     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
6823     //
6824     // map(p[1:24])
6825     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
6826     //
6827     // map(s)
6828     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
6829     //
6830     // map(s.i)
6831     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
6832     //
6833     // map(s.s.f)
6834     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6835     //
6836     // map(s.p)
6837     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
6838     //
6839     // map(to: s.p[:22])
6840     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
6841     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
6842     // &(s.p), &(s.p[0]), 22*sizeof(double),
6843     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6844     // (*) alloc space for struct members, only this is a target parameter
6845     // (**) map the pointer (nothing to be mapped in this example) (the compiler
6846     //      optimizes this entry out, same in the examples below)
6847     // (***) map the pointee (map: to)
6848     //
6849     // map(s.ps)
6850     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6851     //
6852     // map(from: s.ps->s.i)
6853     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6854     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6855     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
6856     //
6857     // map(to: s.ps->ps)
6858     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6859     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6860     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
6861     //
6862     // map(s.ps->ps->ps)
6863     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6864     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6865     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6866     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6867     //
6868     // map(to: s.ps->ps->s.f[:22])
6869     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6870     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6871     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6872     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6873     //
6874     // map(ps)
6875     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
6876     //
6877     // map(ps->i)
6878     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
6879     //
6880     // map(ps->s.f)
6881     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6882     //
6883     // map(from: ps->p)
6884     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
6885     //
6886     // map(to: ps->p[:22])
6887     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
6888     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
6889     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
6890     //
6891     // map(ps->ps)
6892     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6893     //
6894     // map(from: ps->ps->s.i)
6895     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6896     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6897     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6898     //
6899     // map(from: ps->ps->ps)
6900     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6901     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6902     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6903     //
6904     // map(ps->ps->ps->ps)
6905     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6906     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6907     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6908     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6909     //
6910     // map(to: ps->ps->ps->s.f[:22])
6911     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6912     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6913     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6914     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6915     //
6916     // map(to: s.f[:22]) map(from: s.p[:33])
6917     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
6918     //     sizeof(double*) (**), TARGET_PARAM
6919     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
6920     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
6921     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6922     // (*) allocate contiguous space needed to fit all mapped members even if
6923     //     we allocate space for members not mapped (in this example,
6924     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
6925     //     them as well because they fall between &s.f[0] and &s.p)
6926     //
6927     // map(from: s.f[:22]) map(to: ps->p[:33])
6928     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
6929     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
6930     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
6931     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
6932     // (*) the struct this entry pertains to is the 2nd element in the list of
6933     //     arguments, hence MEMBER_OF(2)
6934     //
6935     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
6936     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
6937     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
6938     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
6939     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
6940     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
6941     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
6942     // (*) the struct this entry pertains to is the 4th element in the list
6943     //     of arguments, hence MEMBER_OF(4)
6944 
6945     // Track if the map information being generated is the first for a capture.
6946     bool IsCaptureFirstInfo = IsFirstComponentList;
6947     bool IsLink = false; // Is this variable a "declare target link"?
6948 
6949     // Scan the components from the base to the complete expression.
6950     auto CI = Components.rbegin();
6951     auto CE = Components.rend();
6952     auto I = CI;
6953 
6954     // Track if the map information being generated is the first for a list of
6955     // components.
6956     bool IsExpressionFirstInfo = true;
6957     Address BP = Address::invalid();
6958 
6959     if (isa<MemberExpr>(I->getAssociatedExpression())) {
6960       // The base is the 'this' pointer. The content of the pointer is going
6961       // to be the base of the field being mapped.
6962       BP = CGF.LoadCXXThisAddress();
6963     } else {
6964       // The base is the reference to the variable.
6965       // BP = &Var.
6966       BP = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress();
6967       if (const auto *VD =
6968               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
6969         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
6970                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD))
6971           if (*Res == OMPDeclareTargetDeclAttr::MT_Link) {
6972             IsLink = true;
6973             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD);
6974           }
6975       }
6976 
6977       // If the variable is a pointer and is being dereferenced (i.e. is not
6978       // the last component), the base has to be the pointer itself, not its
6979       // reference. References are ignored for mapping purposes.
6980       QualType Ty =
6981           I->getAssociatedDeclaration()->getType().getNonReferenceType();
6982       if (Ty->isAnyPointerType() && std::next(I) != CE) {
6983         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
6984 
6985         // We do not need to generate individual map information for the
6986         // pointer, it can be associated with the combined storage.
6987         ++I;
6988       }
6989     }
6990 
6991     // Track whether a component of the list should be marked as MEMBER_OF some
6992     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
6993     // in a component list should be marked as MEMBER_OF, all subsequent entries
6994     // do not belong to the base struct. E.g.
6995     // struct S2 s;
6996     // s.ps->ps->ps->f[:]
6997     //   (1) (2) (3) (4)
6998     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
6999     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7000     // is the pointee of ps(2) which is not member of struct s, so it should not
7001     // be marked as such (it is still PTR_AND_OBJ).
7002     // The variable is initialized to false so that PTR_AND_OBJ entries which
7003     // are not struct members are not considered (e.g. array of pointers to
7004     // data).
7005     bool ShouldBeMemberOf = false;
7006 
7007     // Variable keeping track of whether or not we have encountered a component
7008     // in the component list which is a member expression. Useful when we have a
7009     // pointer or a final array section, in which case it is the previous
7010     // component in the list which tells us whether we have a member expression.
7011     // E.g. X.f[:]
7012     // While processing the final array section "[:]" it is "f" which tells us
7013     // whether we are dealing with a member of a declared struct.
7014     const MemberExpr *EncounteredME = nullptr;
7015 
7016     for (; I != CE; ++I) {
7017       // If the current component is member of a struct (parent struct) mark it.
7018       if (!EncounteredME) {
7019         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7020         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7021         // as MEMBER_OF the parent struct.
7022         if (EncounteredME)
7023           ShouldBeMemberOf = true;
7024       }
7025 
7026       auto Next = std::next(I);
7027 
7028       // We need to generate the addresses and sizes if this is the last
7029       // component, if the component is a pointer or if it is an array section
7030       // whose length can't be proved to be one. If this is a pointer, it
7031       // becomes the base address for the following components.
7032 
7033       // A final array section, is one whose length can't be proved to be one.
7034       bool IsFinalArraySection =
7035           isFinalArraySectionExpression(I->getAssociatedExpression());
7036 
7037       // Get information on whether the element is a pointer. Have to do a
7038       // special treatment for array sections given that they are built-in
7039       // types.
7040       const auto *OASE =
7041           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7042       bool IsPointer =
7043           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7044                        .getCanonicalType()
7045                        ->isAnyPointerType()) ||
7046           I->getAssociatedExpression()->getType()->isAnyPointerType();
7047 
7048       if (Next == CE || IsPointer || IsFinalArraySection) {
7049         // If this is not the last component, we expect the pointer to be
7050         // associated with an array expression or member expression.
7051         assert((Next == CE ||
7052                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7053                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7054                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
7055                "Unexpected expression");
7056 
7057         Address LB =
7058             CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress();
7059 
7060         // If this component is a pointer inside the base struct then we don't
7061         // need to create any entry for it - it will be combined with the object
7062         // it is pointing to into a single PTR_AND_OBJ entry.
7063         bool IsMemberPointer =
7064             IsPointer && EncounteredME &&
7065             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7066              EncounteredME);
7067         if (!OverlappedElements.empty()) {
7068           // Handle base element with the info for overlapped elements.
7069           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7070           assert(Next == CE &&
7071                  "Expected last element for the overlapped elements.");
7072           assert(!IsPointer &&
7073                  "Unexpected base element with the pointer type.");
7074           // Mark the whole struct as the struct that requires allocation on the
7075           // device.
7076           PartialStruct.LowestElem = {0, LB};
7077           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7078               I->getAssociatedExpression()->getType());
7079           Address HB = CGF.Builder.CreateConstGEP(
7080               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7081                                                               CGF.VoidPtrTy),
7082               TypeSize.getQuantity() - 1, CharUnits::One());
7083           PartialStruct.HighestElem = {
7084               std::numeric_limits<decltype(
7085                   PartialStruct.HighestElem.first)>::max(),
7086               HB};
7087           PartialStruct.Base = BP;
7088           // Emit data for non-overlapped data.
7089           OpenMPOffloadMappingFlags Flags =
7090               OMP_MAP_MEMBER_OF |
7091               getMapTypeBits(MapType, MapTypeModifier, IsImplicit,
7092                              /*AddPtrFlag=*/false,
7093                              /*AddIsTargetParamFlag=*/false);
7094           LB = BP;
7095           llvm::Value *Size = nullptr;
7096           // Do bitcopy of all non-overlapped structure elements.
7097           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7098                    Component : OverlappedElements) {
7099             Address ComponentLB = Address::invalid();
7100             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7101                  Component) {
7102               if (MC.getAssociatedDeclaration()) {
7103                 ComponentLB =
7104                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7105                         .getAddress();
7106                 Size = CGF.Builder.CreatePtrDiff(
7107                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7108                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7109                 break;
7110               }
7111             }
7112             BasePointers.push_back(BP.getPointer());
7113             Pointers.push_back(LB.getPointer());
7114             Sizes.push_back(Size);
7115             Types.push_back(Flags);
7116             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1,
7117                                             CGF.getPointerSize());
7118           }
7119           BasePointers.push_back(BP.getPointer());
7120           Pointers.push_back(LB.getPointer());
7121           Size = CGF.Builder.CreatePtrDiff(
7122               CGF.EmitCastToVoidPtr(
7123                   CGF.Builder.CreateConstGEP(HB, 1, CharUnits::One())
7124                       .getPointer()),
7125               CGF.EmitCastToVoidPtr(LB.getPointer()));
7126           Sizes.push_back(Size);
7127           Types.push_back(Flags);
7128           break;
7129         }
7130         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7131         if (!IsMemberPointer) {
7132           BasePointers.push_back(BP.getPointer());
7133           Pointers.push_back(LB.getPointer());
7134           Sizes.push_back(Size);
7135 
7136           // We need to add a pointer flag for each map that comes from the
7137           // same expression except for the first one. We also need to signal
7138           // this map is the first one that relates with the current capture
7139           // (there is a set of entries for each capture).
7140           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7141               MapType, MapTypeModifier, IsImplicit,
7142               !IsExpressionFirstInfo || IsLink, IsCaptureFirstInfo && !IsLink);
7143 
7144           if (!IsExpressionFirstInfo) {
7145             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7146             // then we reset the TO/FROM/ALWAYS/DELETE flags.
7147             if (IsPointer)
7148               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7149                          OMP_MAP_DELETE);
7150 
7151             if (ShouldBeMemberOf) {
7152               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7153               // should be later updated with the correct value of MEMBER_OF.
7154               Flags |= OMP_MAP_MEMBER_OF;
7155               // From now on, all subsequent PTR_AND_OBJ entries should not be
7156               // marked as MEMBER_OF.
7157               ShouldBeMemberOf = false;
7158             }
7159           }
7160 
7161           Types.push_back(Flags);
7162         }
7163 
7164         // If we have encountered a member expression so far, keep track of the
7165         // mapped member. If the parent is "*this", then the value declaration
7166         // is nullptr.
7167         if (EncounteredME) {
7168           const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl());
7169           unsigned FieldIndex = FD->getFieldIndex();
7170 
7171           // Update info about the lowest and highest elements for this struct
7172           if (!PartialStruct.Base.isValid()) {
7173             PartialStruct.LowestElem = {FieldIndex, LB};
7174             PartialStruct.HighestElem = {FieldIndex, LB};
7175             PartialStruct.Base = BP;
7176           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7177             PartialStruct.LowestElem = {FieldIndex, LB};
7178           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7179             PartialStruct.HighestElem = {FieldIndex, LB};
7180           }
7181         }
7182 
7183         // If we have a final array section, we are done with this expression.
7184         if (IsFinalArraySection)
7185           break;
7186 
7187         // The pointer becomes the base for the next element.
7188         if (Next != CE)
7189           BP = LB;
7190 
7191         IsExpressionFirstInfo = false;
7192         IsCaptureFirstInfo = false;
7193       }
7194     }
7195   }
7196 
7197   /// Return the adjusted map modifiers if the declaration a capture refers to
7198   /// appears in a first-private clause. This is expected to be used only with
7199   /// directives that start with 'target'.
7200   MappableExprsHandler::OpenMPOffloadMappingFlags
7201   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7202     assert(Cap.capturesVariable() && "Expected capture by reference only!");
7203 
7204     // A first private variable captured by reference will use only the
7205     // 'private ptr' and 'map to' flag. Return the right flags if the captured
7206     // declaration is known as first-private in this handler.
7207     if (FirstPrivateDecls.count(Cap.getCapturedVar()))
7208       return MappableExprsHandler::OMP_MAP_PRIVATE |
7209              MappableExprsHandler::OMP_MAP_TO;
7210     return MappableExprsHandler::OMP_MAP_TO |
7211            MappableExprsHandler::OMP_MAP_FROM;
7212   }
7213 
7214   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7215     // Member of is given by the 16 MSB of the flag, so rotate by 48 bits.
7216     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7217                                                   << 48);
7218   }
7219 
7220   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7221                                      OpenMPOffloadMappingFlags MemberOfFlag) {
7222     // If the entry is PTR_AND_OBJ but has not been marked with the special
7223     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7224     // marked as MEMBER_OF.
7225     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
7226         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
7227       return;
7228 
7229     // Reset the placeholder value to prepare the flag for the assignment of the
7230     // proper MEMBER_OF value.
7231     Flags &= ~OMP_MAP_MEMBER_OF;
7232     Flags |= MemberOfFlag;
7233   }
7234 
7235   void getPlainLayout(const CXXRecordDecl *RD,
7236                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7237                       bool AsBase) const {
7238     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7239 
7240     llvm::StructType *St =
7241         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7242 
7243     unsigned NumElements = St->getNumElements();
7244     llvm::SmallVector<
7245         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7246         RecordLayout(NumElements);
7247 
7248     // Fill bases.
7249     for (const auto &I : RD->bases()) {
7250       if (I.isVirtual())
7251         continue;
7252       const auto *Base = I.getType()->getAsCXXRecordDecl();
7253       // Ignore empty bases.
7254       if (Base->isEmpty() || CGF.getContext()
7255                                  .getASTRecordLayout(Base)
7256                                  .getNonVirtualSize()
7257                                  .isZero())
7258         continue;
7259 
7260       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7261       RecordLayout[FieldIndex] = Base;
7262     }
7263     // Fill in virtual bases.
7264     for (const auto &I : RD->vbases()) {
7265       const auto *Base = I.getType()->getAsCXXRecordDecl();
7266       // Ignore empty bases.
7267       if (Base->isEmpty())
7268         continue;
7269       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7270       if (RecordLayout[FieldIndex])
7271         continue;
7272       RecordLayout[FieldIndex] = Base;
7273     }
7274     // Fill in all the fields.
7275     assert(!RD->isUnion() && "Unexpected union.");
7276     for (const auto *Field : RD->fields()) {
7277       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7278       // will fill in later.)
7279       if (!Field->isBitField()) {
7280         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7281         RecordLayout[FieldIndex] = Field;
7282       }
7283     }
7284     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7285              &Data : RecordLayout) {
7286       if (Data.isNull())
7287         continue;
7288       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7289         getPlainLayout(Base, Layout, /*AsBase=*/true);
7290       else
7291         Layout.push_back(Data.get<const FieldDecl *>());
7292     }
7293   }
7294 
7295 public:
7296   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
7297       : CurDir(Dir), CGF(CGF) {
7298     // Extract firstprivate clause information.
7299     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
7300       for (const auto *D : C->varlists())
7301         FirstPrivateDecls.insert(
7302             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
7303     // Extract device pointer clause information.
7304     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
7305       for (auto L : C->component_lists())
7306         DevPointersMap[L.first].push_back(L.second);
7307   }
7308 
7309   /// Generate code for the combined entry if we have a partially mapped struct
7310   /// and take care of the mapping flags of the arguments corresponding to
7311   /// individual struct members.
7312   void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
7313                          MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7314                          MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
7315                          const StructRangeInfoTy &PartialStruct) const {
7316     // Base is the base of the struct
7317     BasePointers.push_back(PartialStruct.Base.getPointer());
7318     // Pointer is the address of the lowest element
7319     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
7320     Pointers.push_back(LB);
7321     // Size is (addr of {highest+1} element) - (addr of lowest element)
7322     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
7323     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
7324     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
7325     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
7326     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
7327     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.SizeTy,
7328                                                   /*isSinged=*/false);
7329     Sizes.push_back(Size);
7330     // Map type is always TARGET_PARAM
7331     Types.push_back(OMP_MAP_TARGET_PARAM);
7332     // Remove TARGET_PARAM flag from the first element
7333     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
7334 
7335     // All other current entries will be MEMBER_OF the combined entry
7336     // (except for PTR_AND_OBJ entries which do not have a placeholder value
7337     // 0xFFFF in the MEMBER_OF field).
7338     OpenMPOffloadMappingFlags MemberOfFlag =
7339         getMemberOfFlag(BasePointers.size() - 1);
7340     for (auto &M : CurTypes)
7341       setCorrectMemberOfFlag(M, MemberOfFlag);
7342   }
7343 
7344   /// Generate all the base pointers, section pointers, sizes and map
7345   /// types for the extracted mappable expressions. Also, for each item that
7346   /// relates with a device pointer, a pair of the relevant declaration and
7347   /// index where it occurs is appended to the device pointers info array.
7348   void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
7349                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7350                        MapFlagsArrayTy &Types) const {
7351     // We have to process the component lists that relate with the same
7352     // declaration in a single chunk so that we can generate the map flags
7353     // correctly. Therefore, we organize all lists in a map.
7354     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
7355 
7356     // Helper function to fill the information map for the different supported
7357     // clauses.
7358     auto &&InfoGen = [&Info](
7359         const ValueDecl *D,
7360         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7361         OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier,
7362         bool ReturnDevicePointer, bool IsImplicit) {
7363       const ValueDecl *VD =
7364           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
7365       Info[VD].emplace_back(L, MapType, MapModifier, ReturnDevicePointer,
7366                             IsImplicit);
7367     };
7368 
7369     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
7370     for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
7371       for (const auto &L : C->component_lists()) {
7372         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier(),
7373             /*ReturnDevicePointer=*/false, C->isImplicit());
7374       }
7375     for (const auto *C : this->CurDir.getClausesOfKind<OMPToClause>())
7376       for (const auto &L : C->component_lists()) {
7377         InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown,
7378             /*ReturnDevicePointer=*/false, C->isImplicit());
7379       }
7380     for (const auto *C : this->CurDir.getClausesOfKind<OMPFromClause>())
7381       for (const auto &L : C->component_lists()) {
7382         InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown,
7383             /*ReturnDevicePointer=*/false, C->isImplicit());
7384       }
7385 
7386     // Look at the use_device_ptr clause information and mark the existing map
7387     // entries as such. If there is no map information for an entry in the
7388     // use_device_ptr list, we create one with map type 'alloc' and zero size
7389     // section. It is the user fault if that was not mapped before. If there is
7390     // no map information and the pointer is a struct member, then we defer the
7391     // emission of that entry until the whole struct has been processed.
7392     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
7393         DeferredInfo;
7394 
7395     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
7396     for (const auto *C :
7397         this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) {
7398       for (const auto &L : C->component_lists()) {
7399         assert(!L.second.empty() && "Not expecting empty list of components!");
7400         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
7401         VD = cast<ValueDecl>(VD->getCanonicalDecl());
7402         const Expr *IE = L.second.back().getAssociatedExpression();
7403         // If the first component is a member expression, we have to look into
7404         // 'this', which maps to null in the map of map information. Otherwise
7405         // look directly for the information.
7406         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7407 
7408         // We potentially have map information for this declaration already.
7409         // Look for the first set of components that refer to it.
7410         if (It != Info.end()) {
7411           auto CI = std::find_if(
7412               It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
7413                 return MI.Components.back().getAssociatedDeclaration() == VD;
7414               });
7415           // If we found a map entry, signal that the pointer has to be returned
7416           // and move on to the next declaration.
7417           if (CI != It->second.end()) {
7418             CI->ReturnDevicePointer = true;
7419             continue;
7420           }
7421         }
7422 
7423         // We didn't find any match in our map information - generate a zero
7424         // size array section - if the pointer is a struct member we defer this
7425         // action until the whole struct has been processed.
7426         // FIXME: MSVC 2013 seems to require this-> to find member CGF.
7427         if (isa<MemberExpr>(IE)) {
7428           // Insert the pointer into Info to be processed by
7429           // generateInfoForComponentList. Because it is a member pointer
7430           // without a pointee, no entry will be generated for it, therefore
7431           // we need to generate one after the whole struct has been processed.
7432           // Nonetheless, generateInfoForComponentList must be called to take
7433           // the pointer into account for the calculation of the range of the
7434           // partial struct.
7435           InfoGen(nullptr, L.second, OMPC_MAP_unknown, OMPC_MAP_unknown,
7436                   /*ReturnDevicePointer=*/false, C->isImplicit());
7437           DeferredInfo[nullptr].emplace_back(IE, VD);
7438         } else {
7439           llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
7440               this->CGF.EmitLValue(IE), IE->getExprLoc());
7441           BasePointers.emplace_back(Ptr, VD);
7442           Pointers.push_back(Ptr);
7443           Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy));
7444           Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
7445         }
7446       }
7447     }
7448 
7449     for (const auto &M : Info) {
7450       // We need to know when we generate information for the first component
7451       // associated with a capture, because the mapping flags depend on it.
7452       bool IsFirstComponentList = true;
7453 
7454       // Temporary versions of arrays
7455       MapBaseValuesArrayTy CurBasePointers;
7456       MapValuesArrayTy CurPointers;
7457       MapValuesArrayTy CurSizes;
7458       MapFlagsArrayTy CurTypes;
7459       StructRangeInfoTy PartialStruct;
7460 
7461       for (const MapInfo &L : M.second) {
7462         assert(!L.Components.empty() &&
7463                "Not expecting declaration with no component lists.");
7464 
7465         // Remember the current base pointer index.
7466         unsigned CurrentBasePointersIdx = CurBasePointers.size();
7467         // FIXME: MSVC 2013 seems to require this-> to find the member method.
7468         this->generateInfoForComponentList(
7469             L.MapType, L.MapTypeModifier, L.Components, CurBasePointers,
7470             CurPointers, CurSizes, CurTypes, PartialStruct,
7471             IsFirstComponentList, L.IsImplicit);
7472 
7473         // If this entry relates with a device pointer, set the relevant
7474         // declaration and add the 'return pointer' flag.
7475         if (L.ReturnDevicePointer) {
7476           assert(CurBasePointers.size() > CurrentBasePointersIdx &&
7477                  "Unexpected number of mapped base pointers.");
7478 
7479           const ValueDecl *RelevantVD =
7480               L.Components.back().getAssociatedDeclaration();
7481           assert(RelevantVD &&
7482                  "No relevant declaration related with device pointer??");
7483 
7484           CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
7485           CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
7486         }
7487         IsFirstComponentList = false;
7488       }
7489 
7490       // Append any pending zero-length pointers which are struct members and
7491       // used with use_device_ptr.
7492       auto CI = DeferredInfo.find(M.first);
7493       if (CI != DeferredInfo.end()) {
7494         for (const DeferredDevicePtrEntryTy &L : CI->second) {
7495           llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer();
7496           llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
7497               this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
7498           CurBasePointers.emplace_back(BasePtr, L.VD);
7499           CurPointers.push_back(Ptr);
7500           CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy));
7501           // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
7502           // value MEMBER_OF=FFFF so that the entry is later updated with the
7503           // correct value of MEMBER_OF.
7504           CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
7505                              OMP_MAP_MEMBER_OF);
7506         }
7507       }
7508 
7509       // If there is an entry in PartialStruct it means we have a struct with
7510       // individual members mapped. Emit an extra combined entry.
7511       if (PartialStruct.Base.isValid())
7512         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
7513                           PartialStruct);
7514 
7515       // We need to append the results of this capture to what we already have.
7516       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
7517       Pointers.append(CurPointers.begin(), CurPointers.end());
7518       Sizes.append(CurSizes.begin(), CurSizes.end());
7519       Types.append(CurTypes.begin(), CurTypes.end());
7520     }
7521   }
7522 
7523   /// Generate the base pointers, section pointers, sizes and map types
7524   /// associated to a given capture.
7525   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
7526                               llvm::Value *Arg,
7527                               MapBaseValuesArrayTy &BasePointers,
7528                               MapValuesArrayTy &Pointers,
7529                               MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
7530                               StructRangeInfoTy &PartialStruct) const {
7531     assert(!Cap->capturesVariableArrayType() &&
7532            "Not expecting to generate map info for a variable array type!");
7533 
7534     // We need to know when we generating information for the first component
7535     const ValueDecl *VD = Cap->capturesThis()
7536                               ? nullptr
7537                               : Cap->getCapturedVar()->getCanonicalDecl();
7538 
7539     // If this declaration appears in a is_device_ptr clause we just have to
7540     // pass the pointer by value. If it is a reference to a declaration, we just
7541     // pass its value.
7542     if (DevPointersMap.count(VD)) {
7543       BasePointers.emplace_back(Arg, VD);
7544       Pointers.push_back(Arg);
7545       Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
7546       Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
7547       return;
7548     }
7549 
7550     using MapData =
7551         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
7552                    OpenMPMapClauseKind, OpenMPMapClauseKind, bool>;
7553     SmallVector<MapData, 4> DeclComponentLists;
7554     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
7555     for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) {
7556       for (const auto &L : C->decl_component_lists(VD)) {
7557         assert(L.first == VD &&
7558                "We got information for the wrong declaration??");
7559         assert(!L.second.empty() &&
7560                "Not expecting declaration with no component lists.");
7561         DeclComponentLists.emplace_back(L.second, C->getMapType(),
7562                                         C->getMapTypeModifier(),
7563                                         C->isImplicit());
7564       }
7565     }
7566 
7567     // Find overlapping elements (including the offset from the base element).
7568     llvm::SmallDenseMap<
7569         const MapData *,
7570         llvm::SmallVector<
7571             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
7572         4>
7573         OverlappedData;
7574     size_t Count = 0;
7575     for (const MapData &L : DeclComponentLists) {
7576       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7577       OpenMPMapClauseKind MapType;
7578       OpenMPMapClauseKind MapTypeModifier;
7579       bool IsImplicit;
7580       std::tie(Components, MapType, MapTypeModifier, IsImplicit) = L;
7581       ++Count;
7582       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
7583         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
7584         std::tie(Components1, MapType, MapTypeModifier, IsImplicit) = L1;
7585         auto CI = Components.rbegin();
7586         auto CE = Components.rend();
7587         auto SI = Components1.rbegin();
7588         auto SE = Components1.rend();
7589         for (; CI != CE && SI != SE; ++CI, ++SI) {
7590           if (CI->getAssociatedExpression()->getStmtClass() !=
7591               SI->getAssociatedExpression()->getStmtClass())
7592             break;
7593           // Are we dealing with different variables/fields?
7594           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
7595             break;
7596         }
7597         // Found overlapping if, at least for one component, reached the head of
7598         // the components list.
7599         if (CI == CE || SI == SE) {
7600           assert((CI != CE || SI != SE) &&
7601                  "Unexpected full match of the mapping components.");
7602           const MapData &BaseData = CI == CE ? L : L1;
7603           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
7604               SI == SE ? Components : Components1;
7605           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
7606           OverlappedElements.getSecond().push_back(SubData);
7607         }
7608       }
7609     }
7610     // Sort the overlapped elements for each item.
7611     llvm::SmallVector<const FieldDecl *, 4> Layout;
7612     if (!OverlappedData.empty()) {
7613       if (const auto *CRD =
7614               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
7615         getPlainLayout(CRD, Layout, /*AsBase=*/false);
7616       else {
7617         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
7618         Layout.append(RD->field_begin(), RD->field_end());
7619       }
7620     }
7621     for (auto &Pair : OverlappedData) {
7622       llvm::sort(
7623           Pair.getSecond(),
7624           [&Layout](
7625               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
7626               OMPClauseMappableExprCommon::MappableExprComponentListRef
7627                   Second) {
7628             auto CI = First.rbegin();
7629             auto CE = First.rend();
7630             auto SI = Second.rbegin();
7631             auto SE = Second.rend();
7632             for (; CI != CE && SI != SE; ++CI, ++SI) {
7633               if (CI->getAssociatedExpression()->getStmtClass() !=
7634                   SI->getAssociatedExpression()->getStmtClass())
7635                 break;
7636               // Are we dealing with different variables/fields?
7637               if (CI->getAssociatedDeclaration() !=
7638                   SI->getAssociatedDeclaration())
7639                 break;
7640             }
7641 
7642             // Lists contain the same elements.
7643             if (CI == CE && SI == SE)
7644               return false;
7645 
7646             // List with less elements is less than list with more elements.
7647             if (CI == CE || SI == SE)
7648               return CI == CE;
7649 
7650             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
7651             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
7652             if (FD1->getParent() == FD2->getParent())
7653               return FD1->getFieldIndex() < FD2->getFieldIndex();
7654             const auto It =
7655                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
7656                   return FD == FD1 || FD == FD2;
7657                 });
7658             return *It == FD1;
7659           });
7660     }
7661 
7662     // Associated with a capture, because the mapping flags depend on it.
7663     // Go through all of the elements with the overlapped elements.
7664     for (const auto &Pair : OverlappedData) {
7665       const MapData &L = *Pair.getFirst();
7666       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7667       OpenMPMapClauseKind MapType;
7668       OpenMPMapClauseKind MapTypeModifier;
7669       bool IsImplicit;
7670       std::tie(Components, MapType, MapTypeModifier, IsImplicit) = L;
7671       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7672           OverlappedComponents = Pair.getSecond();
7673       bool IsFirstComponentList = true;
7674       generateInfoForComponentList(MapType, MapTypeModifier, Components,
7675                                    BasePointers, Pointers, Sizes, Types,
7676                                    PartialStruct, IsFirstComponentList,
7677                                    IsImplicit, OverlappedComponents);
7678     }
7679     // Go through other elements without overlapped elements.
7680     bool IsFirstComponentList = OverlappedData.empty();
7681     for (const MapData &L : DeclComponentLists) {
7682       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7683       OpenMPMapClauseKind MapType;
7684       OpenMPMapClauseKind MapTypeModifier;
7685       bool IsImplicit;
7686       std::tie(Components, MapType, MapTypeModifier, IsImplicit) = L;
7687       auto It = OverlappedData.find(&L);
7688       if (It == OverlappedData.end())
7689         generateInfoForComponentList(MapType, MapTypeModifier, Components,
7690                                      BasePointers, Pointers, Sizes, Types,
7691                                      PartialStruct, IsFirstComponentList,
7692                                      IsImplicit);
7693       IsFirstComponentList = false;
7694     }
7695   }
7696 
7697   /// Generate the base pointers, section pointers, sizes and map types
7698   /// associated with the declare target link variables.
7699   void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
7700                                         MapValuesArrayTy &Pointers,
7701                                         MapValuesArrayTy &Sizes,
7702                                         MapFlagsArrayTy &Types) const {
7703     // Map other list items in the map clause which are not captured variables
7704     // but "declare target link" global variables.,
7705     for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) {
7706       for (const auto &L : C->component_lists()) {
7707         if (!L.first)
7708           continue;
7709         const auto *VD = dyn_cast<VarDecl>(L.first);
7710         if (!VD)
7711           continue;
7712         llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7713             OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
7714         if (!Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
7715           continue;
7716         StructRangeInfoTy PartialStruct;
7717         generateInfoForComponentList(
7718             C->getMapType(), C->getMapTypeModifier(), L.second, BasePointers,
7719             Pointers, Sizes, Types, PartialStruct,
7720             /*IsFirstComponentList=*/true, C->isImplicit());
7721         assert(!PartialStruct.Base.isValid() &&
7722                "No partial structs for declare target link expected.");
7723       }
7724     }
7725   }
7726 
7727   /// Generate the default map information for a given capture \a CI,
7728   /// record field declaration \a RI and captured value \a CV.
7729   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
7730                               const FieldDecl &RI, llvm::Value *CV,
7731                               MapBaseValuesArrayTy &CurBasePointers,
7732                               MapValuesArrayTy &CurPointers,
7733                               MapValuesArrayTy &CurSizes,
7734                               MapFlagsArrayTy &CurMapTypes) const {
7735     // Do the default mapping.
7736     if (CI.capturesThis()) {
7737       CurBasePointers.push_back(CV);
7738       CurPointers.push_back(CV);
7739       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
7740       CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType()));
7741       // Default map type.
7742       CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
7743     } else if (CI.capturesVariableByCopy()) {
7744       CurBasePointers.push_back(CV);
7745       CurPointers.push_back(CV);
7746       if (!RI.getType()->isAnyPointerType()) {
7747         // We have to signal to the runtime captures passed by value that are
7748         // not pointers.
7749         CurMapTypes.push_back(OMP_MAP_LITERAL);
7750         CurSizes.push_back(CGF.getTypeSize(RI.getType()));
7751       } else {
7752         // Pointers are implicitly mapped with a zero size and no flags
7753         // (other than first map that is added for all implicit maps).
7754         CurMapTypes.push_back(OMP_MAP_NONE);
7755         CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy));
7756       }
7757     } else {
7758       assert(CI.capturesVariable() && "Expected captured reference.");
7759       CurBasePointers.push_back(CV);
7760       CurPointers.push_back(CV);
7761 
7762       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
7763       QualType ElementType = PtrTy->getPointeeType();
7764       CurSizes.push_back(CGF.getTypeSize(ElementType));
7765       // The default map type for a scalar/complex type is 'to' because by
7766       // default the value doesn't have to be retrieved. For an aggregate
7767       // type, the default is 'tofrom'.
7768       CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
7769     }
7770     // Every default map produces a single argument which is a target parameter.
7771     CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
7772 
7773     // Add flag stating this is an implicit map.
7774     CurMapTypes.back() |= OMP_MAP_IMPLICIT;
7775   }
7776 };
7777 
7778 enum OpenMPOffloadingReservedDeviceIDs {
7779   /// Device ID if the device was not defined, runtime should get it
7780   /// from environment variables in the spec.
7781   OMP_DEVICEID_UNDEF = -1,
7782 };
7783 } // anonymous namespace
7784 
7785 /// Emit the arrays used to pass the captures and map information to the
7786 /// offloading runtime library. If there is no map or capture information,
7787 /// return nullptr by reference.
7788 static void
7789 emitOffloadingArrays(CodeGenFunction &CGF,
7790                      MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
7791                      MappableExprsHandler::MapValuesArrayTy &Pointers,
7792                      MappableExprsHandler::MapValuesArrayTy &Sizes,
7793                      MappableExprsHandler::MapFlagsArrayTy &MapTypes,
7794                      CGOpenMPRuntime::TargetDataInfo &Info) {
7795   CodeGenModule &CGM = CGF.CGM;
7796   ASTContext &Ctx = CGF.getContext();
7797 
7798   // Reset the array information.
7799   Info.clearArrayInfo();
7800   Info.NumberOfPtrs = BasePointers.size();
7801 
7802   if (Info.NumberOfPtrs) {
7803     // Detect if we have any capture size requiring runtime evaluation of the
7804     // size so that a constant array could be eventually used.
7805     bool hasRuntimeEvaluationCaptureSize = false;
7806     for (llvm::Value *S : Sizes)
7807       if (!isa<llvm::Constant>(S)) {
7808         hasRuntimeEvaluationCaptureSize = true;
7809         break;
7810       }
7811 
7812     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
7813     QualType PointerArrayType =
7814         Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
7815                                  /*IndexTypeQuals=*/0);
7816 
7817     Info.BasePointersArray =
7818         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
7819     Info.PointersArray =
7820         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
7821 
7822     // If we don't have any VLA types or other types that require runtime
7823     // evaluation, we can use a constant array for the map sizes, otherwise we
7824     // need to fill up the arrays as we do for the pointers.
7825     if (hasRuntimeEvaluationCaptureSize) {
7826       QualType SizeArrayType = Ctx.getConstantArrayType(
7827           Ctx.getSizeType(), PointerNumAP, ArrayType::Normal,
7828           /*IndexTypeQuals=*/0);
7829       Info.SizesArray =
7830           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
7831     } else {
7832       // We expect all the sizes to be constant, so we collect them to create
7833       // a constant array.
7834       SmallVector<llvm::Constant *, 16> ConstSizes;
7835       for (llvm::Value *S : Sizes)
7836         ConstSizes.push_back(cast<llvm::Constant>(S));
7837 
7838       auto *SizesArrayInit = llvm::ConstantArray::get(
7839           llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes);
7840       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
7841       auto *SizesArrayGbl = new llvm::GlobalVariable(
7842           CGM.getModule(), SizesArrayInit->getType(),
7843           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
7844           SizesArrayInit, Name);
7845       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
7846       Info.SizesArray = SizesArrayGbl;
7847     }
7848 
7849     // The map types are always constant so we don't need to generate code to
7850     // fill arrays. Instead, we create an array constant.
7851     SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
7852     llvm::copy(MapTypes, Mapping.begin());
7853     llvm::Constant *MapTypesArrayInit =
7854         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
7855     std::string MaptypesName =
7856         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
7857     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
7858         CGM.getModule(), MapTypesArrayInit->getType(),
7859         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
7860         MapTypesArrayInit, MaptypesName);
7861     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
7862     Info.MapTypesArray = MapTypesArrayGbl;
7863 
7864     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
7865       llvm::Value *BPVal = *BasePointers[I];
7866       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
7867           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
7868           Info.BasePointersArray, 0, I);
7869       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7870           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
7871       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
7872       CGF.Builder.CreateStore(BPVal, BPAddr);
7873 
7874       if (Info.requiresDevicePointerInfo())
7875         if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
7876           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
7877 
7878       llvm::Value *PVal = Pointers[I];
7879       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
7880           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
7881           Info.PointersArray, 0, I);
7882       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7883           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
7884       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
7885       CGF.Builder.CreateStore(PVal, PAddr);
7886 
7887       if (hasRuntimeEvaluationCaptureSize) {
7888         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
7889             llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs),
7890             Info.SizesArray,
7891             /*Idx0=*/0,
7892             /*Idx1=*/I);
7893         Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType()));
7894         CGF.Builder.CreateStore(
7895             CGF.Builder.CreateIntCast(Sizes[I], CGM.SizeTy, /*isSigned=*/true),
7896             SAddr);
7897       }
7898     }
7899   }
7900 }
7901 /// Emit the arguments to be passed to the runtime library based on the
7902 /// arrays of pointers, sizes and map types.
7903 static void emitOffloadingArraysArgument(
7904     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
7905     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
7906     llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
7907   CodeGenModule &CGM = CGF.CGM;
7908   if (Info.NumberOfPtrs) {
7909     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
7910         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
7911         Info.BasePointersArray,
7912         /*Idx0=*/0, /*Idx1=*/0);
7913     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
7914         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
7915         Info.PointersArray,
7916         /*Idx0=*/0,
7917         /*Idx1=*/0);
7918     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
7919         llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray,
7920         /*Idx0=*/0, /*Idx1=*/0);
7921     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
7922         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
7923         Info.MapTypesArray,
7924         /*Idx0=*/0,
7925         /*Idx1=*/0);
7926   } else {
7927     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
7928     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
7929     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo());
7930     MapTypesArrayArg =
7931         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
7932   }
7933 }
7934 
7935 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
7936                                      const OMPExecutableDirective &D,
7937                                      llvm::Value *OutlinedFn,
7938                                      llvm::Value *OutlinedFnID,
7939                                      const Expr *IfCond, const Expr *Device) {
7940   if (!CGF.HaveInsertPoint())
7941     return;
7942 
7943   assert(OutlinedFn && "Invalid outlined function!");
7944 
7945   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
7946   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
7947   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
7948   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
7949                                             PrePostActionTy &) {
7950     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
7951   };
7952   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
7953 
7954   CodeGenFunction::OMPTargetDataInfo InputInfo;
7955   llvm::Value *MapTypesArray = nullptr;
7956   // Fill up the pointer arrays and transfer execution to the device.
7957   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
7958                     &MapTypesArray, &CS, RequiresOuterTask,
7959                     &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) {
7960     // On top of the arrays that were filled up, the target offloading call
7961     // takes as arguments the device id as well as the host pointer. The host
7962     // pointer is used by the runtime library to identify the current target
7963     // region, so it only has to be unique and not necessarily point to
7964     // anything. It could be the pointer to the outlined function that
7965     // implements the target region, but we aren't using that so that the
7966     // compiler doesn't need to keep that, and could therefore inline the host
7967     // function if proven worthwhile during optimization.
7968 
7969     // From this point on, we need to have an ID of the target region defined.
7970     assert(OutlinedFnID && "Invalid outlined function ID!");
7971 
7972     // Emit device ID if any.
7973     llvm::Value *DeviceID;
7974     if (Device) {
7975       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
7976                                            CGF.Int64Ty, /*isSigned=*/true);
7977     } else {
7978       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
7979     }
7980 
7981     // Emit the number of elements in the offloading arrays.
7982     llvm::Value *PointerNum =
7983         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
7984 
7985     // Return value of the runtime offloading call.
7986     llvm::Value *Return;
7987 
7988     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(*this, CGF, D);
7989     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(*this, CGF, D);
7990 
7991     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
7992     // The target region is an outlined function launched by the runtime
7993     // via calls __tgt_target() or __tgt_target_teams().
7994     //
7995     // __tgt_target() launches a target region with one team and one thread,
7996     // executing a serial region.  This master thread may in turn launch
7997     // more threads within its team upon encountering a parallel region,
7998     // however, no additional teams can be launched on the device.
7999     //
8000     // __tgt_target_teams() launches a target region with one or more teams,
8001     // each with one or more threads.  This call is required for target
8002     // constructs such as:
8003     //  'target teams'
8004     //  'target' / 'teams'
8005     //  'target teams distribute parallel for'
8006     //  'target parallel'
8007     // and so on.
8008     //
8009     // Note that on the host and CPU targets, the runtime implementation of
8010     // these calls simply call the outlined function without forking threads.
8011     // The outlined functions themselves have runtime calls to
8012     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
8013     // the compiler in emitTeamsCall() and emitParallelCall().
8014     //
8015     // In contrast, on the NVPTX target, the implementation of
8016     // __tgt_target_teams() launches a GPU kernel with the requested number
8017     // of teams and threads so no additional calls to the runtime are required.
8018     if (NumTeams) {
8019       // If we have NumTeams defined this means that we have an enclosed teams
8020       // region. Therefore we also expect to have NumThreads defined. These two
8021       // values should be defined in the presence of a teams directive,
8022       // regardless of having any clauses associated. If the user is using teams
8023       // but no clauses, these two values will be the default that should be
8024       // passed to the runtime library - a 32-bit integer with the value zero.
8025       assert(NumThreads && "Thread limit expression should be available along "
8026                            "with number of teams.");
8027       llvm::Value *OffloadingArgs[] = {DeviceID,
8028                                        OutlinedFnID,
8029                                        PointerNum,
8030                                        InputInfo.BasePointersArray.getPointer(),
8031                                        InputInfo.PointersArray.getPointer(),
8032                                        InputInfo.SizesArray.getPointer(),
8033                                        MapTypesArray,
8034                                        NumTeams,
8035                                        NumThreads};
8036       Return = CGF.EmitRuntimeCall(
8037           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
8038                                           : OMPRTL__tgt_target_teams),
8039           OffloadingArgs);
8040     } else {
8041       llvm::Value *OffloadingArgs[] = {DeviceID,
8042                                        OutlinedFnID,
8043                                        PointerNum,
8044                                        InputInfo.BasePointersArray.getPointer(),
8045                                        InputInfo.PointersArray.getPointer(),
8046                                        InputInfo.SizesArray.getPointer(),
8047                                        MapTypesArray};
8048       Return = CGF.EmitRuntimeCall(
8049           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
8050                                           : OMPRTL__tgt_target),
8051           OffloadingArgs);
8052     }
8053 
8054     // Check the error code and execute the host version if required.
8055     llvm::BasicBlock *OffloadFailedBlock =
8056         CGF.createBasicBlock("omp_offload.failed");
8057     llvm::BasicBlock *OffloadContBlock =
8058         CGF.createBasicBlock("omp_offload.cont");
8059     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
8060     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
8061 
8062     CGF.EmitBlock(OffloadFailedBlock);
8063     if (RequiresOuterTask) {
8064       CapturedVars.clear();
8065       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
8066     }
8067     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
8068     CGF.EmitBranch(OffloadContBlock);
8069 
8070     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
8071   };
8072 
8073   // Notify that the host version must be executed.
8074   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
8075                     RequiresOuterTask](CodeGenFunction &CGF,
8076                                        PrePostActionTy &) {
8077     if (RequiresOuterTask) {
8078       CapturedVars.clear();
8079       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
8080     }
8081     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
8082   };
8083 
8084   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
8085                           &CapturedVars, RequiresOuterTask,
8086                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
8087     // Fill up the arrays with all the captured variables.
8088     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
8089     MappableExprsHandler::MapValuesArrayTy Pointers;
8090     MappableExprsHandler::MapValuesArrayTy Sizes;
8091     MappableExprsHandler::MapFlagsArrayTy MapTypes;
8092 
8093     // Get mappable expression information.
8094     MappableExprsHandler MEHandler(D, CGF);
8095 
8096     auto RI = CS.getCapturedRecordDecl()->field_begin();
8097     auto CV = CapturedVars.begin();
8098     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
8099                                               CE = CS.capture_end();
8100          CI != CE; ++CI, ++RI, ++CV) {
8101       MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
8102       MappableExprsHandler::MapValuesArrayTy CurPointers;
8103       MappableExprsHandler::MapValuesArrayTy CurSizes;
8104       MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
8105       MappableExprsHandler::StructRangeInfoTy PartialStruct;
8106 
8107       // VLA sizes are passed to the outlined region by copy and do not have map
8108       // information associated.
8109       if (CI->capturesVariableArrayType()) {
8110         CurBasePointers.push_back(*CV);
8111         CurPointers.push_back(*CV);
8112         CurSizes.push_back(CGF.getTypeSize(RI->getType()));
8113         // Copy to the device as an argument. No need to retrieve it.
8114         CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
8115                               MappableExprsHandler::OMP_MAP_TARGET_PARAM);
8116       } else {
8117         // If we have any information in the map clause, we use it, otherwise we
8118         // just do a default mapping.
8119         MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
8120                                          CurSizes, CurMapTypes, PartialStruct);
8121         if (CurBasePointers.empty())
8122           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
8123                                            CurPointers, CurSizes, CurMapTypes);
8124       }
8125       // We expect to have at least an element of information for this capture.
8126       assert(!CurBasePointers.empty() &&
8127              "Non-existing map pointer for capture!");
8128       assert(CurBasePointers.size() == CurPointers.size() &&
8129              CurBasePointers.size() == CurSizes.size() &&
8130              CurBasePointers.size() == CurMapTypes.size() &&
8131              "Inconsistent map information sizes!");
8132 
8133       // If there is an entry in PartialStruct it means we have a struct with
8134       // individual members mapped. Emit an extra combined entry.
8135       if (PartialStruct.Base.isValid())
8136         MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
8137                                     CurMapTypes, PartialStruct);
8138 
8139       // We need to append the results of this capture to what we already have.
8140       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8141       Pointers.append(CurPointers.begin(), CurPointers.end());
8142       Sizes.append(CurSizes.begin(), CurSizes.end());
8143       MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
8144     }
8145     // Map other list items in the map clause which are not captured variables
8146     // but "declare target link" global variables.
8147     MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
8148                                                MapTypes);
8149 
8150     TargetDataInfo Info;
8151     // Fill up the arrays and create the arguments.
8152     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
8153     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
8154                                  Info.PointersArray, Info.SizesArray,
8155                                  Info.MapTypesArray, Info);
8156     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
8157     InputInfo.BasePointersArray =
8158         Address(Info.BasePointersArray, CGM.getPointerAlign());
8159     InputInfo.PointersArray =
8160         Address(Info.PointersArray, CGM.getPointerAlign());
8161     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
8162     MapTypesArray = Info.MapTypesArray;
8163     if (RequiresOuterTask)
8164       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
8165     else
8166       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
8167   };
8168 
8169   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
8170                              CodeGenFunction &CGF, PrePostActionTy &) {
8171     if (RequiresOuterTask) {
8172       CodeGenFunction::OMPTargetDataInfo InputInfo;
8173       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
8174     } else {
8175       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
8176     }
8177   };
8178 
8179   // If we have a target function ID it means that we need to support
8180   // offloading, otherwise, just execute on the host. We need to execute on host
8181   // regardless of the conditional in the if clause if, e.g., the user do not
8182   // specify target triples.
8183   if (OutlinedFnID) {
8184     if (IfCond) {
8185       emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
8186     } else {
8187       RegionCodeGenTy ThenRCG(TargetThenGen);
8188       ThenRCG(CGF);
8189     }
8190   } else {
8191     RegionCodeGenTy ElseRCG(TargetElseGen);
8192     ElseRCG(CGF);
8193   }
8194 }
8195 
8196 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
8197                                                     StringRef ParentName) {
8198   if (!S)
8199     return;
8200 
8201   // Codegen OMP target directives that offload compute to the device.
8202   bool RequiresDeviceCodegen =
8203       isa<OMPExecutableDirective>(S) &&
8204       isOpenMPTargetExecutionDirective(
8205           cast<OMPExecutableDirective>(S)->getDirectiveKind());
8206 
8207   if (RequiresDeviceCodegen) {
8208     const auto &E = *cast<OMPExecutableDirective>(S);
8209     unsigned DeviceID;
8210     unsigned FileID;
8211     unsigned Line;
8212     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
8213                              FileID, Line);
8214 
8215     // Is this a target region that should not be emitted as an entry point? If
8216     // so just signal we are done with this target region.
8217     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
8218                                                             ParentName, Line))
8219       return;
8220 
8221     switch (E.getDirectiveKind()) {
8222     case OMPD_target:
8223       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
8224                                                    cast<OMPTargetDirective>(E));
8225       break;
8226     case OMPD_target_parallel:
8227       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
8228           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
8229       break;
8230     case OMPD_target_teams:
8231       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
8232           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
8233       break;
8234     case OMPD_target_teams_distribute:
8235       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
8236           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
8237       break;
8238     case OMPD_target_teams_distribute_simd:
8239       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
8240           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
8241       break;
8242     case OMPD_target_parallel_for:
8243       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
8244           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
8245       break;
8246     case OMPD_target_parallel_for_simd:
8247       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
8248           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
8249       break;
8250     case OMPD_target_simd:
8251       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
8252           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
8253       break;
8254     case OMPD_target_teams_distribute_parallel_for:
8255       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
8256           CGM, ParentName,
8257           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
8258       break;
8259     case OMPD_target_teams_distribute_parallel_for_simd:
8260       CodeGenFunction::
8261           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
8262               CGM, ParentName,
8263               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
8264       break;
8265     case OMPD_parallel:
8266     case OMPD_for:
8267     case OMPD_parallel_for:
8268     case OMPD_parallel_sections:
8269     case OMPD_for_simd:
8270     case OMPD_parallel_for_simd:
8271     case OMPD_cancel:
8272     case OMPD_cancellation_point:
8273     case OMPD_ordered:
8274     case OMPD_threadprivate:
8275     case OMPD_task:
8276     case OMPD_simd:
8277     case OMPD_sections:
8278     case OMPD_section:
8279     case OMPD_single:
8280     case OMPD_master:
8281     case OMPD_critical:
8282     case OMPD_taskyield:
8283     case OMPD_barrier:
8284     case OMPD_taskwait:
8285     case OMPD_taskgroup:
8286     case OMPD_atomic:
8287     case OMPD_flush:
8288     case OMPD_teams:
8289     case OMPD_target_data:
8290     case OMPD_target_exit_data:
8291     case OMPD_target_enter_data:
8292     case OMPD_distribute:
8293     case OMPD_distribute_simd:
8294     case OMPD_distribute_parallel_for:
8295     case OMPD_distribute_parallel_for_simd:
8296     case OMPD_teams_distribute:
8297     case OMPD_teams_distribute_simd:
8298     case OMPD_teams_distribute_parallel_for:
8299     case OMPD_teams_distribute_parallel_for_simd:
8300     case OMPD_target_update:
8301     case OMPD_declare_simd:
8302     case OMPD_declare_target:
8303     case OMPD_end_declare_target:
8304     case OMPD_declare_reduction:
8305     case OMPD_taskloop:
8306     case OMPD_taskloop_simd:
8307     case OMPD_requires:
8308     case OMPD_unknown:
8309       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
8310     }
8311     return;
8312   }
8313 
8314   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
8315     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
8316       return;
8317 
8318     scanForTargetRegionsFunctions(
8319         E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
8320     return;
8321   }
8322 
8323   // If this is a lambda function, look into its body.
8324   if (const auto *L = dyn_cast<LambdaExpr>(S))
8325     S = L->getBody();
8326 
8327   // Keep looking for target regions recursively.
8328   for (const Stmt *II : S->children())
8329     scanForTargetRegionsFunctions(II, ParentName);
8330 }
8331 
8332 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
8333   // If emitting code for the host, we do not process FD here. Instead we do
8334   // the normal code generation.
8335   if (!CGM.getLangOpts().OpenMPIsDevice)
8336     return false;
8337 
8338   // Try to detect target regions in the function.
8339   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
8340   if (const auto *FD = dyn_cast<FunctionDecl>(VD))
8341     scanForTargetRegionsFunctions(FD->getBody(), CGM.getMangledName(GD));
8342 
8343   // Do not to emit function if it is not marked as declare target.
8344   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
8345          AlreadyEmittedTargetFunctions.count(VD->getCanonicalDecl()) == 0;
8346 }
8347 
8348 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
8349   if (!CGM.getLangOpts().OpenMPIsDevice)
8350     return false;
8351 
8352   // Check if there are Ctors/Dtors in this declaration and look for target
8353   // regions in it. We use the complete variant to produce the kernel name
8354   // mangling.
8355   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
8356   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
8357     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
8358       StringRef ParentName =
8359           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
8360       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
8361     }
8362     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
8363       StringRef ParentName =
8364           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
8365       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
8366     }
8367   }
8368 
8369   // Do not to emit variable if it is not marked as declare target.
8370   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8371       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
8372           cast<VarDecl>(GD.getDecl()));
8373   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link) {
8374     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
8375     return true;
8376   }
8377   return false;
8378 }
8379 
8380 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
8381                                                    llvm::Constant *Addr) {
8382   if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8383           OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
8384     OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
8385     StringRef VarName;
8386     CharUnits VarSize;
8387     llvm::GlobalValue::LinkageTypes Linkage;
8388     switch (*Res) {
8389     case OMPDeclareTargetDeclAttr::MT_To:
8390       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
8391       VarName = CGM.getMangledName(VD);
8392       if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
8393         VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
8394         assert(!VarSize.isZero() && "Expected non-zero size of the variable");
8395       } else {
8396         VarSize = CharUnits::Zero();
8397       }
8398       Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
8399       // Temp solution to prevent optimizations of the internal variables.
8400       if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
8401         std::string RefName = getName({VarName, "ref"});
8402         if (!CGM.GetGlobalValue(RefName)) {
8403           llvm::Constant *AddrRef =
8404               getOrCreateInternalVariable(Addr->getType(), RefName);
8405           auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
8406           GVAddrRef->setConstant(/*Val=*/true);
8407           GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
8408           GVAddrRef->setInitializer(Addr);
8409           CGM.addCompilerUsedGlobal(GVAddrRef);
8410         }
8411       }
8412       break;
8413     case OMPDeclareTargetDeclAttr::MT_Link:
8414       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
8415       if (CGM.getLangOpts().OpenMPIsDevice) {
8416         VarName = Addr->getName();
8417         Addr = nullptr;
8418       } else {
8419         VarName = getAddrOfDeclareTargetLink(VD).getName();
8420         Addr =
8421             cast<llvm::Constant>(getAddrOfDeclareTargetLink(VD).getPointer());
8422       }
8423       VarSize = CGM.getPointerSize();
8424       Linkage = llvm::GlobalValue::WeakAnyLinkage;
8425       break;
8426     }
8427     OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
8428         VarName, Addr, VarSize, Flags, Linkage);
8429   }
8430 }
8431 
8432 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
8433   if (isa<FunctionDecl>(GD.getDecl()) ||
8434       isa<OMPDeclareReductionDecl>(GD.getDecl()))
8435     return emitTargetFunctions(GD);
8436 
8437   return emitTargetGlobalVariable(GD);
8438 }
8439 
8440 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
8441   for (const VarDecl *VD : DeferredGlobalVariables) {
8442     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8443         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
8444     if (!Res)
8445       continue;
8446     if (*Res == OMPDeclareTargetDeclAttr::MT_To) {
8447       CGM.EmitGlobal(VD);
8448     } else {
8449       assert(*Res == OMPDeclareTargetDeclAttr::MT_Link &&
8450              "Expected to or link clauses.");
8451       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD);
8452     }
8453   }
8454 }
8455 
8456 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
8457     CodeGenModule &CGM)
8458     : CGM(CGM) {
8459   if (CGM.getLangOpts().OpenMPIsDevice) {
8460     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
8461     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
8462   }
8463 }
8464 
8465 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
8466   if (CGM.getLangOpts().OpenMPIsDevice)
8467     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
8468 }
8469 
8470 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
8471   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
8472     return true;
8473 
8474   const auto *D = cast<FunctionDecl>(GD.getDecl());
8475   const FunctionDecl *FD = D->getCanonicalDecl();
8476   // Do not to emit function if it is marked as declare target as it was already
8477   // emitted.
8478   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
8479     if (D->hasBody() && AlreadyEmittedTargetFunctions.count(FD) == 0) {
8480       if (auto *F = dyn_cast_or_null<llvm::Function>(
8481               CGM.GetGlobalValue(CGM.getMangledName(GD))))
8482         return !F->isDeclaration();
8483       return false;
8484     }
8485     return true;
8486   }
8487 
8488   return !AlreadyEmittedTargetFunctions.insert(FD).second;
8489 }
8490 
8491 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
8492   // If we have offloading in the current module, we need to emit the entries
8493   // now and register the offloading descriptor.
8494   createOffloadEntriesAndInfoMetadata();
8495 
8496   // Create and register the offloading binary descriptors. This is the main
8497   // entity that captures all the information about offloading in the current
8498   // compilation unit.
8499   return createOffloadingBinaryDescriptorRegistration();
8500 }
8501 
8502 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
8503                                     const OMPExecutableDirective &D,
8504                                     SourceLocation Loc,
8505                                     llvm::Value *OutlinedFn,
8506                                     ArrayRef<llvm::Value *> CapturedVars) {
8507   if (!CGF.HaveInsertPoint())
8508     return;
8509 
8510   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
8511   CodeGenFunction::RunCleanupsScope Scope(CGF);
8512 
8513   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
8514   llvm::Value *Args[] = {
8515       RTLoc,
8516       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
8517       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
8518   llvm::SmallVector<llvm::Value *, 16> RealArgs;
8519   RealArgs.append(std::begin(Args), std::end(Args));
8520   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
8521 
8522   llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
8523   CGF.EmitRuntimeCall(RTLFn, RealArgs);
8524 }
8525 
8526 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
8527                                          const Expr *NumTeams,
8528                                          const Expr *ThreadLimit,
8529                                          SourceLocation Loc) {
8530   if (!CGF.HaveInsertPoint())
8531     return;
8532 
8533   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
8534 
8535   llvm::Value *NumTeamsVal =
8536       NumTeams
8537           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
8538                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
8539           : CGF.Builder.getInt32(0);
8540 
8541   llvm::Value *ThreadLimitVal =
8542       ThreadLimit
8543           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
8544                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
8545           : CGF.Builder.getInt32(0);
8546 
8547   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
8548   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
8549                                      ThreadLimitVal};
8550   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
8551                       PushNumTeamsArgs);
8552 }
8553 
8554 void CGOpenMPRuntime::emitTargetDataCalls(
8555     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
8556     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
8557   if (!CGF.HaveInsertPoint())
8558     return;
8559 
8560   // Action used to replace the default codegen action and turn privatization
8561   // off.
8562   PrePostActionTy NoPrivAction;
8563 
8564   // Generate the code for the opening of the data environment. Capture all the
8565   // arguments of the runtime call by reference because they are used in the
8566   // closing of the region.
8567   auto &&BeginThenGen = [this, &D, Device, &Info,
8568                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
8569     // Fill up the arrays with all the mapped variables.
8570     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
8571     MappableExprsHandler::MapValuesArrayTy Pointers;
8572     MappableExprsHandler::MapValuesArrayTy Sizes;
8573     MappableExprsHandler::MapFlagsArrayTy MapTypes;
8574 
8575     // Get map clause information.
8576     MappableExprsHandler MCHandler(D, CGF);
8577     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
8578 
8579     // Fill up the arrays and create the arguments.
8580     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
8581 
8582     llvm::Value *BasePointersArrayArg = nullptr;
8583     llvm::Value *PointersArrayArg = nullptr;
8584     llvm::Value *SizesArrayArg = nullptr;
8585     llvm::Value *MapTypesArrayArg = nullptr;
8586     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
8587                                  SizesArrayArg, MapTypesArrayArg, Info);
8588 
8589     // Emit device ID if any.
8590     llvm::Value *DeviceID = nullptr;
8591     if (Device) {
8592       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
8593                                            CGF.Int64Ty, /*isSigned=*/true);
8594     } else {
8595       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
8596     }
8597 
8598     // Emit the number of elements in the offloading arrays.
8599     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
8600 
8601     llvm::Value *OffloadingArgs[] = {
8602         DeviceID,         PointerNum,    BasePointersArrayArg,
8603         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
8604     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin),
8605                         OffloadingArgs);
8606 
8607     // If device pointer privatization is required, emit the body of the region
8608     // here. It will have to be duplicated: with and without privatization.
8609     if (!Info.CaptureDeviceAddrMap.empty())
8610       CodeGen(CGF);
8611   };
8612 
8613   // Generate code for the closing of the data region.
8614   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
8615                                             PrePostActionTy &) {
8616     assert(Info.isValid() && "Invalid data environment closing arguments.");
8617 
8618     llvm::Value *BasePointersArrayArg = nullptr;
8619     llvm::Value *PointersArrayArg = nullptr;
8620     llvm::Value *SizesArrayArg = nullptr;
8621     llvm::Value *MapTypesArrayArg = nullptr;
8622     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
8623                                  SizesArrayArg, MapTypesArrayArg, Info);
8624 
8625     // Emit device ID if any.
8626     llvm::Value *DeviceID = nullptr;
8627     if (Device) {
8628       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
8629                                            CGF.Int64Ty, /*isSigned=*/true);
8630     } else {
8631       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
8632     }
8633 
8634     // Emit the number of elements in the offloading arrays.
8635     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
8636 
8637     llvm::Value *OffloadingArgs[] = {
8638         DeviceID,         PointerNum,    BasePointersArrayArg,
8639         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
8640     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end),
8641                         OffloadingArgs);
8642   };
8643 
8644   // If we need device pointer privatization, we need to emit the body of the
8645   // region with no privatization in the 'else' branch of the conditional.
8646   // Otherwise, we don't have to do anything.
8647   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
8648                                                          PrePostActionTy &) {
8649     if (!Info.CaptureDeviceAddrMap.empty()) {
8650       CodeGen.setAction(NoPrivAction);
8651       CodeGen(CGF);
8652     }
8653   };
8654 
8655   // We don't have to do anything to close the region if the if clause evaluates
8656   // to false.
8657   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
8658 
8659   if (IfCond) {
8660     emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
8661   } else {
8662     RegionCodeGenTy RCG(BeginThenGen);
8663     RCG(CGF);
8664   }
8665 
8666   // If we don't require privatization of device pointers, we emit the body in
8667   // between the runtime calls. This avoids duplicating the body code.
8668   if (Info.CaptureDeviceAddrMap.empty()) {
8669     CodeGen.setAction(NoPrivAction);
8670     CodeGen(CGF);
8671   }
8672 
8673   if (IfCond) {
8674     emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen);
8675   } else {
8676     RegionCodeGenTy RCG(EndThenGen);
8677     RCG(CGF);
8678   }
8679 }
8680 
8681 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
8682     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
8683     const Expr *Device) {
8684   if (!CGF.HaveInsertPoint())
8685     return;
8686 
8687   assert((isa<OMPTargetEnterDataDirective>(D) ||
8688           isa<OMPTargetExitDataDirective>(D) ||
8689           isa<OMPTargetUpdateDirective>(D)) &&
8690          "Expecting either target enter, exit data, or update directives.");
8691 
8692   CodeGenFunction::OMPTargetDataInfo InputInfo;
8693   llvm::Value *MapTypesArray = nullptr;
8694   // Generate the code for the opening of the data environment.
8695   auto &&ThenGen = [this, &D, Device, &InputInfo,
8696                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
8697     // Emit device ID if any.
8698     llvm::Value *DeviceID = nullptr;
8699     if (Device) {
8700       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
8701                                            CGF.Int64Ty, /*isSigned=*/true);
8702     } else {
8703       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
8704     }
8705 
8706     // Emit the number of elements in the offloading arrays.
8707     llvm::Constant *PointerNum =
8708         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
8709 
8710     llvm::Value *OffloadingArgs[] = {DeviceID,
8711                                      PointerNum,
8712                                      InputInfo.BasePointersArray.getPointer(),
8713                                      InputInfo.PointersArray.getPointer(),
8714                                      InputInfo.SizesArray.getPointer(),
8715                                      MapTypesArray};
8716 
8717     // Select the right runtime function call for each expected standalone
8718     // directive.
8719     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
8720     OpenMPRTLFunction RTLFn;
8721     switch (D.getDirectiveKind()) {
8722     case OMPD_target_enter_data:
8723       RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
8724                         : OMPRTL__tgt_target_data_begin;
8725       break;
8726     case OMPD_target_exit_data:
8727       RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
8728                         : OMPRTL__tgt_target_data_end;
8729       break;
8730     case OMPD_target_update:
8731       RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
8732                         : OMPRTL__tgt_target_data_update;
8733       break;
8734     case OMPD_parallel:
8735     case OMPD_for:
8736     case OMPD_parallel_for:
8737     case OMPD_parallel_sections:
8738     case OMPD_for_simd:
8739     case OMPD_parallel_for_simd:
8740     case OMPD_cancel:
8741     case OMPD_cancellation_point:
8742     case OMPD_ordered:
8743     case OMPD_threadprivate:
8744     case OMPD_task:
8745     case OMPD_simd:
8746     case OMPD_sections:
8747     case OMPD_section:
8748     case OMPD_single:
8749     case OMPD_master:
8750     case OMPD_critical:
8751     case OMPD_taskyield:
8752     case OMPD_barrier:
8753     case OMPD_taskwait:
8754     case OMPD_taskgroup:
8755     case OMPD_atomic:
8756     case OMPD_flush:
8757     case OMPD_teams:
8758     case OMPD_target_data:
8759     case OMPD_distribute:
8760     case OMPD_distribute_simd:
8761     case OMPD_distribute_parallel_for:
8762     case OMPD_distribute_parallel_for_simd:
8763     case OMPD_teams_distribute:
8764     case OMPD_teams_distribute_simd:
8765     case OMPD_teams_distribute_parallel_for:
8766     case OMPD_teams_distribute_parallel_for_simd:
8767     case OMPD_declare_simd:
8768     case OMPD_declare_target:
8769     case OMPD_end_declare_target:
8770     case OMPD_declare_reduction:
8771     case OMPD_taskloop:
8772     case OMPD_taskloop_simd:
8773     case OMPD_target:
8774     case OMPD_target_simd:
8775     case OMPD_target_teams_distribute:
8776     case OMPD_target_teams_distribute_simd:
8777     case OMPD_target_teams_distribute_parallel_for:
8778     case OMPD_target_teams_distribute_parallel_for_simd:
8779     case OMPD_target_teams:
8780     case OMPD_target_parallel:
8781     case OMPD_target_parallel_for:
8782     case OMPD_target_parallel_for_simd:
8783     case OMPD_requires:
8784     case OMPD_unknown:
8785       llvm_unreachable("Unexpected standalone target data directive.");
8786       break;
8787     }
8788     CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
8789   };
8790 
8791   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
8792                              CodeGenFunction &CGF, PrePostActionTy &) {
8793     // Fill up the arrays with all the mapped variables.
8794     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
8795     MappableExprsHandler::MapValuesArrayTy Pointers;
8796     MappableExprsHandler::MapValuesArrayTy Sizes;
8797     MappableExprsHandler::MapFlagsArrayTy MapTypes;
8798 
8799     // Get map clause information.
8800     MappableExprsHandler MEHandler(D, CGF);
8801     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
8802 
8803     TargetDataInfo Info;
8804     // Fill up the arrays and create the arguments.
8805     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
8806     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
8807                                  Info.PointersArray, Info.SizesArray,
8808                                  Info.MapTypesArray, Info);
8809     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
8810     InputInfo.BasePointersArray =
8811         Address(Info.BasePointersArray, CGM.getPointerAlign());
8812     InputInfo.PointersArray =
8813         Address(Info.PointersArray, CGM.getPointerAlign());
8814     InputInfo.SizesArray =
8815         Address(Info.SizesArray, CGM.getPointerAlign());
8816     MapTypesArray = Info.MapTypesArray;
8817     if (D.hasClausesOfKind<OMPDependClause>())
8818       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
8819     else
8820       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
8821   };
8822 
8823   if (IfCond) {
8824     emitOMPIfClause(CGF, IfCond, TargetThenGen,
8825                     [](CodeGenFunction &CGF, PrePostActionTy &) {});
8826   } else {
8827     RegionCodeGenTy ThenRCG(TargetThenGen);
8828     ThenRCG(CGF);
8829   }
8830 }
8831 
8832 namespace {
8833   /// Kind of parameter in a function with 'declare simd' directive.
8834   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
8835   /// Attribute set of the parameter.
8836   struct ParamAttrTy {
8837     ParamKindTy Kind = Vector;
8838     llvm::APSInt StrideOrArg;
8839     llvm::APSInt Alignment;
8840   };
8841 } // namespace
8842 
8843 static unsigned evaluateCDTSize(const FunctionDecl *FD,
8844                                 ArrayRef<ParamAttrTy> ParamAttrs) {
8845   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
8846   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
8847   // of that clause. The VLEN value must be power of 2.
8848   // In other case the notion of the function`s "characteristic data type" (CDT)
8849   // is used to compute the vector length.
8850   // CDT is defined in the following order:
8851   //   a) For non-void function, the CDT is the return type.
8852   //   b) If the function has any non-uniform, non-linear parameters, then the
8853   //   CDT is the type of the first such parameter.
8854   //   c) If the CDT determined by a) or b) above is struct, union, or class
8855   //   type which is pass-by-value (except for the type that maps to the
8856   //   built-in complex data type), the characteristic data type is int.
8857   //   d) If none of the above three cases is applicable, the CDT is int.
8858   // The VLEN is then determined based on the CDT and the size of vector
8859   // register of that ISA for which current vector version is generated. The
8860   // VLEN is computed using the formula below:
8861   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
8862   // where vector register size specified in section 3.2.1 Registers and the
8863   // Stack Frame of original AMD64 ABI document.
8864   QualType RetType = FD->getReturnType();
8865   if (RetType.isNull())
8866     return 0;
8867   ASTContext &C = FD->getASTContext();
8868   QualType CDT;
8869   if (!RetType.isNull() && !RetType->isVoidType()) {
8870     CDT = RetType;
8871   } else {
8872     unsigned Offset = 0;
8873     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
8874       if (ParamAttrs[Offset].Kind == Vector)
8875         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
8876       ++Offset;
8877     }
8878     if (CDT.isNull()) {
8879       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
8880         if (ParamAttrs[I + Offset].Kind == Vector) {
8881           CDT = FD->getParamDecl(I)->getType();
8882           break;
8883         }
8884       }
8885     }
8886   }
8887   if (CDT.isNull())
8888     CDT = C.IntTy;
8889   CDT = CDT->getCanonicalTypeUnqualified();
8890   if (CDT->isRecordType() || CDT->isUnionType())
8891     CDT = C.IntTy;
8892   return C.getTypeSize(CDT);
8893 }
8894 
8895 static void
8896 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
8897                            const llvm::APSInt &VLENVal,
8898                            ArrayRef<ParamAttrTy> ParamAttrs,
8899                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
8900   struct ISADataTy {
8901     char ISA;
8902     unsigned VecRegSize;
8903   };
8904   ISADataTy ISAData[] = {
8905       {
8906           'b', 128
8907       }, // SSE
8908       {
8909           'c', 256
8910       }, // AVX
8911       {
8912           'd', 256
8913       }, // AVX2
8914       {
8915           'e', 512
8916       }, // AVX512
8917   };
8918   llvm::SmallVector<char, 2> Masked;
8919   switch (State) {
8920   case OMPDeclareSimdDeclAttr::BS_Undefined:
8921     Masked.push_back('N');
8922     Masked.push_back('M');
8923     break;
8924   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
8925     Masked.push_back('N');
8926     break;
8927   case OMPDeclareSimdDeclAttr::BS_Inbranch:
8928     Masked.push_back('M');
8929     break;
8930   }
8931   for (char Mask : Masked) {
8932     for (const ISADataTy &Data : ISAData) {
8933       SmallString<256> Buffer;
8934       llvm::raw_svector_ostream Out(Buffer);
8935       Out << "_ZGV" << Data.ISA << Mask;
8936       if (!VLENVal) {
8937         Out << llvm::APSInt::getUnsigned(Data.VecRegSize /
8938                                          evaluateCDTSize(FD, ParamAttrs));
8939       } else {
8940         Out << VLENVal;
8941       }
8942       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
8943         switch (ParamAttr.Kind){
8944         case LinearWithVarStride:
8945           Out << 's' << ParamAttr.StrideOrArg;
8946           break;
8947         case Linear:
8948           Out << 'l';
8949           if (!!ParamAttr.StrideOrArg)
8950             Out << ParamAttr.StrideOrArg;
8951           break;
8952         case Uniform:
8953           Out << 'u';
8954           break;
8955         case Vector:
8956           Out << 'v';
8957           break;
8958         }
8959         if (!!ParamAttr.Alignment)
8960           Out << 'a' << ParamAttr.Alignment;
8961       }
8962       Out << '_' << Fn->getName();
8963       Fn->addFnAttr(Out.str());
8964     }
8965   }
8966 }
8967 
8968 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
8969                                               llvm::Function *Fn) {
8970   ASTContext &C = CGM.getContext();
8971   FD = FD->getMostRecentDecl();
8972   // Map params to their positions in function decl.
8973   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
8974   if (isa<CXXMethodDecl>(FD))
8975     ParamPositions.try_emplace(FD, 0);
8976   unsigned ParamPos = ParamPositions.size();
8977   for (const ParmVarDecl *P : FD->parameters()) {
8978     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
8979     ++ParamPos;
8980   }
8981   while (FD) {
8982     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
8983       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
8984       // Mark uniform parameters.
8985       for (const Expr *E : Attr->uniforms()) {
8986         E = E->IgnoreParenImpCasts();
8987         unsigned Pos;
8988         if (isa<CXXThisExpr>(E)) {
8989           Pos = ParamPositions[FD];
8990         } else {
8991           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
8992                                 ->getCanonicalDecl();
8993           Pos = ParamPositions[PVD];
8994         }
8995         ParamAttrs[Pos].Kind = Uniform;
8996       }
8997       // Get alignment info.
8998       auto NI = Attr->alignments_begin();
8999       for (const Expr *E : Attr->aligneds()) {
9000         E = E->IgnoreParenImpCasts();
9001         unsigned Pos;
9002         QualType ParmTy;
9003         if (isa<CXXThisExpr>(E)) {
9004           Pos = ParamPositions[FD];
9005           ParmTy = E->getType();
9006         } else {
9007           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
9008                                 ->getCanonicalDecl();
9009           Pos = ParamPositions[PVD];
9010           ParmTy = PVD->getType();
9011         }
9012         ParamAttrs[Pos].Alignment =
9013             (*NI)
9014                 ? (*NI)->EvaluateKnownConstInt(C)
9015                 : llvm::APSInt::getUnsigned(
9016                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
9017                           .getQuantity());
9018         ++NI;
9019       }
9020       // Mark linear parameters.
9021       auto SI = Attr->steps_begin();
9022       auto MI = Attr->modifiers_begin();
9023       for (const Expr *E : Attr->linears()) {
9024         E = E->IgnoreParenImpCasts();
9025         unsigned Pos;
9026         if (isa<CXXThisExpr>(E)) {
9027           Pos = ParamPositions[FD];
9028         } else {
9029           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
9030                                 ->getCanonicalDecl();
9031           Pos = ParamPositions[PVD];
9032         }
9033         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
9034         ParamAttr.Kind = Linear;
9035         if (*SI) {
9036           if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C,
9037                                     Expr::SE_AllowSideEffects)) {
9038             if (const auto *DRE =
9039                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
9040               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
9041                 ParamAttr.Kind = LinearWithVarStride;
9042                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
9043                     ParamPositions[StridePVD->getCanonicalDecl()]);
9044               }
9045             }
9046           }
9047         }
9048         ++SI;
9049         ++MI;
9050       }
9051       llvm::APSInt VLENVal;
9052       if (const Expr *VLEN = Attr->getSimdlen())
9053         VLENVal = VLEN->EvaluateKnownConstInt(C);
9054       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
9055       if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
9056           CGM.getTriple().getArch() == llvm::Triple::x86_64)
9057         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
9058     }
9059     FD = FD->getPreviousDecl();
9060   }
9061 }
9062 
9063 namespace {
9064 /// Cleanup action for doacross support.
9065 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
9066 public:
9067   static const int DoacrossFinArgs = 2;
9068 
9069 private:
9070   llvm::Value *RTLFn;
9071   llvm::Value *Args[DoacrossFinArgs];
9072 
9073 public:
9074   DoacrossCleanupTy(llvm::Value *RTLFn, ArrayRef<llvm::Value *> CallArgs)
9075       : RTLFn(RTLFn) {
9076     assert(CallArgs.size() == DoacrossFinArgs);
9077     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
9078   }
9079   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
9080     if (!CGF.HaveInsertPoint())
9081       return;
9082     CGF.EmitRuntimeCall(RTLFn, Args);
9083   }
9084 };
9085 } // namespace
9086 
9087 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
9088                                        const OMPLoopDirective &D,
9089                                        ArrayRef<Expr *> NumIterations) {
9090   if (!CGF.HaveInsertPoint())
9091     return;
9092 
9093   ASTContext &C = CGM.getContext();
9094   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9095   RecordDecl *RD;
9096   if (KmpDimTy.isNull()) {
9097     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
9098     //  kmp_int64 lo; // lower
9099     //  kmp_int64 up; // upper
9100     //  kmp_int64 st; // stride
9101     // };
9102     RD = C.buildImplicitRecord("kmp_dim");
9103     RD->startDefinition();
9104     addFieldToRecordDecl(C, RD, Int64Ty);
9105     addFieldToRecordDecl(C, RD, Int64Ty);
9106     addFieldToRecordDecl(C, RD, Int64Ty);
9107     RD->completeDefinition();
9108     KmpDimTy = C.getRecordType(RD);
9109   } else {
9110     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
9111   }
9112   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
9113   QualType ArrayTy =
9114       C.getConstantArrayType(KmpDimTy, Size, ArrayType::Normal, 0);
9115 
9116   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9117   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
9118   enum { LowerFD = 0, UpperFD, StrideFD };
9119   // Fill dims with data.
9120   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
9121     LValue DimsLVal =
9122         CGF.MakeAddrLValue(CGF.Builder.CreateConstArrayGEP(
9123                                DimsAddr, I, C.getTypeSizeInChars(KmpDimTy)),
9124                            KmpDimTy);
9125     // dims.upper = num_iterations;
9126     LValue UpperLVal = CGF.EmitLValueForField(
9127         DimsLVal, *std::next(RD->field_begin(), UpperFD));
9128     llvm::Value *NumIterVal =
9129         CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]),
9130                                  D.getNumIterations()->getType(), Int64Ty,
9131                                  D.getNumIterations()->getExprLoc());
9132     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
9133     // dims.stride = 1;
9134     LValue StrideLVal = CGF.EmitLValueForField(
9135         DimsLVal, *std::next(RD->field_begin(), StrideFD));
9136     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
9137                           StrideLVal);
9138   }
9139 
9140   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
9141   // kmp_int32 num_dims, struct kmp_dim * dims);
9142   llvm::Value *Args[] = {
9143       emitUpdateLocation(CGF, D.getBeginLoc()),
9144       getThreadID(CGF, D.getBeginLoc()),
9145       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
9146       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9147           CGF.Builder
9148               .CreateConstArrayGEP(DimsAddr, 0, C.getTypeSizeInChars(KmpDimTy))
9149               .getPointer(),
9150           CGM.VoidPtrTy)};
9151 
9152   llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_init);
9153   CGF.EmitRuntimeCall(RTLFn, Args);
9154   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
9155       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
9156   llvm::Value *FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
9157   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
9158                                              llvm::makeArrayRef(FiniArgs));
9159 }
9160 
9161 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
9162                                           const OMPDependClause *C) {
9163   QualType Int64Ty =
9164       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9165   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
9166   QualType ArrayTy = CGM.getContext().getConstantArrayType(
9167       Int64Ty, Size, ArrayType::Normal, 0);
9168   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
9169   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
9170     const Expr *CounterVal = C->getLoopData(I);
9171     assert(CounterVal);
9172     llvm::Value *CntVal = CGF.EmitScalarConversion(
9173         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
9174         CounterVal->getExprLoc());
9175     CGF.EmitStoreOfScalar(
9176         CntVal,
9177         CGF.Builder.CreateConstArrayGEP(
9178             CntAddr, I, CGM.getContext().getTypeSizeInChars(Int64Ty)),
9179         /*Volatile=*/false, Int64Ty);
9180   }
9181   llvm::Value *Args[] = {
9182       emitUpdateLocation(CGF, C->getBeginLoc()),
9183       getThreadID(CGF, C->getBeginLoc()),
9184       CGF.Builder
9185           .CreateConstArrayGEP(CntAddr, 0,
9186                                CGM.getContext().getTypeSizeInChars(Int64Ty))
9187           .getPointer()};
9188   llvm::Value *RTLFn;
9189   if (C->getDependencyKind() == OMPC_DEPEND_source) {
9190     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
9191   } else {
9192     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
9193     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
9194   }
9195   CGF.EmitRuntimeCall(RTLFn, Args);
9196 }
9197 
9198 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
9199                                llvm::Value *Callee,
9200                                ArrayRef<llvm::Value *> Args) const {
9201   assert(Loc.isValid() && "Outlined function call location must be valid.");
9202   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
9203 
9204   if (auto *Fn = dyn_cast<llvm::Function>(Callee)) {
9205     if (Fn->doesNotThrow()) {
9206       CGF.EmitNounwindRuntimeCall(Fn, Args);
9207       return;
9208     }
9209   }
9210   CGF.EmitRuntimeCall(Callee, Args);
9211 }
9212 
9213 void CGOpenMPRuntime::emitOutlinedFunctionCall(
9214     CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn,
9215     ArrayRef<llvm::Value *> Args) const {
9216   emitCall(CGF, Loc, OutlinedFn, Args);
9217 }
9218 
9219 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
9220                                              const VarDecl *NativeParam,
9221                                              const VarDecl *TargetParam) const {
9222   return CGF.GetAddrOfLocalVar(NativeParam);
9223 }
9224 
9225 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
9226                                                    const VarDecl *VD) {
9227   return Address::invalid();
9228 }
9229 
9230 llvm::Value *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
9231     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
9232     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
9233   llvm_unreachable("Not supported in SIMD-only mode");
9234 }
9235 
9236 llvm::Value *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
9237     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
9238     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
9239   llvm_unreachable("Not supported in SIMD-only mode");
9240 }
9241 
9242 llvm::Value *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
9243     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
9244     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
9245     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
9246     bool Tied, unsigned &NumberOfParts) {
9247   llvm_unreachable("Not supported in SIMD-only mode");
9248 }
9249 
9250 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
9251                                            SourceLocation Loc,
9252                                            llvm::Value *OutlinedFn,
9253                                            ArrayRef<llvm::Value *> CapturedVars,
9254                                            const Expr *IfCond) {
9255   llvm_unreachable("Not supported in SIMD-only mode");
9256 }
9257 
9258 void CGOpenMPSIMDRuntime::emitCriticalRegion(
9259     CodeGenFunction &CGF, StringRef CriticalName,
9260     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
9261     const Expr *Hint) {
9262   llvm_unreachable("Not supported in SIMD-only mode");
9263 }
9264 
9265 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
9266                                            const RegionCodeGenTy &MasterOpGen,
9267                                            SourceLocation Loc) {
9268   llvm_unreachable("Not supported in SIMD-only mode");
9269 }
9270 
9271 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
9272                                             SourceLocation Loc) {
9273   llvm_unreachable("Not supported in SIMD-only mode");
9274 }
9275 
9276 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
9277     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
9278     SourceLocation Loc) {
9279   llvm_unreachable("Not supported in SIMD-only mode");
9280 }
9281 
9282 void CGOpenMPSIMDRuntime::emitSingleRegion(
9283     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
9284     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
9285     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
9286     ArrayRef<const Expr *> AssignmentOps) {
9287   llvm_unreachable("Not supported in SIMD-only mode");
9288 }
9289 
9290 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
9291                                             const RegionCodeGenTy &OrderedOpGen,
9292                                             SourceLocation Loc,
9293                                             bool IsThreads) {
9294   llvm_unreachable("Not supported in SIMD-only mode");
9295 }
9296 
9297 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
9298                                           SourceLocation Loc,
9299                                           OpenMPDirectiveKind Kind,
9300                                           bool EmitChecks,
9301                                           bool ForceSimpleCall) {
9302   llvm_unreachable("Not supported in SIMD-only mode");
9303 }
9304 
9305 void CGOpenMPSIMDRuntime::emitForDispatchInit(
9306     CodeGenFunction &CGF, SourceLocation Loc,
9307     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
9308     bool Ordered, const DispatchRTInput &DispatchValues) {
9309   llvm_unreachable("Not supported in SIMD-only mode");
9310 }
9311 
9312 void CGOpenMPSIMDRuntime::emitForStaticInit(
9313     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
9314     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
9315   llvm_unreachable("Not supported in SIMD-only mode");
9316 }
9317 
9318 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
9319     CodeGenFunction &CGF, SourceLocation Loc,
9320     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
9321   llvm_unreachable("Not supported in SIMD-only mode");
9322 }
9323 
9324 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
9325                                                      SourceLocation Loc,
9326                                                      unsigned IVSize,
9327                                                      bool IVSigned) {
9328   llvm_unreachable("Not supported in SIMD-only mode");
9329 }
9330 
9331 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
9332                                               SourceLocation Loc,
9333                                               OpenMPDirectiveKind DKind) {
9334   llvm_unreachable("Not supported in SIMD-only mode");
9335 }
9336 
9337 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
9338                                               SourceLocation Loc,
9339                                               unsigned IVSize, bool IVSigned,
9340                                               Address IL, Address LB,
9341                                               Address UB, Address ST) {
9342   llvm_unreachable("Not supported in SIMD-only mode");
9343 }
9344 
9345 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
9346                                                llvm::Value *NumThreads,
9347                                                SourceLocation Loc) {
9348   llvm_unreachable("Not supported in SIMD-only mode");
9349 }
9350 
9351 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
9352                                              OpenMPProcBindClauseKind ProcBind,
9353                                              SourceLocation Loc) {
9354   llvm_unreachable("Not supported in SIMD-only mode");
9355 }
9356 
9357 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
9358                                                     const VarDecl *VD,
9359                                                     Address VDAddr,
9360                                                     SourceLocation Loc) {
9361   llvm_unreachable("Not supported in SIMD-only mode");
9362 }
9363 
9364 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
9365     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
9366     CodeGenFunction *CGF) {
9367   llvm_unreachable("Not supported in SIMD-only mode");
9368 }
9369 
9370 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
9371     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
9372   llvm_unreachable("Not supported in SIMD-only mode");
9373 }
9374 
9375 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
9376                                     ArrayRef<const Expr *> Vars,
9377                                     SourceLocation Loc) {
9378   llvm_unreachable("Not supported in SIMD-only mode");
9379 }
9380 
9381 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
9382                                        const OMPExecutableDirective &D,
9383                                        llvm::Value *TaskFunction,
9384                                        QualType SharedsTy, Address Shareds,
9385                                        const Expr *IfCond,
9386                                        const OMPTaskDataTy &Data) {
9387   llvm_unreachable("Not supported in SIMD-only mode");
9388 }
9389 
9390 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
9391     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
9392     llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds,
9393     const Expr *IfCond, const OMPTaskDataTy &Data) {
9394   llvm_unreachable("Not supported in SIMD-only mode");
9395 }
9396 
9397 void CGOpenMPSIMDRuntime::emitReduction(
9398     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
9399     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
9400     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
9401   assert(Options.SimpleReduction && "Only simple reduction is expected.");
9402   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
9403                                  ReductionOps, Options);
9404 }
9405 
9406 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
9407     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
9408     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
9409   llvm_unreachable("Not supported in SIMD-only mode");
9410 }
9411 
9412 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
9413                                                   SourceLocation Loc,
9414                                                   ReductionCodeGen &RCG,
9415                                                   unsigned N) {
9416   llvm_unreachable("Not supported in SIMD-only mode");
9417 }
9418 
9419 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
9420                                                   SourceLocation Loc,
9421                                                   llvm::Value *ReductionsPtr,
9422                                                   LValue SharedLVal) {
9423   llvm_unreachable("Not supported in SIMD-only mode");
9424 }
9425 
9426 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
9427                                            SourceLocation Loc) {
9428   llvm_unreachable("Not supported in SIMD-only mode");
9429 }
9430 
9431 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
9432     CodeGenFunction &CGF, SourceLocation Loc,
9433     OpenMPDirectiveKind CancelRegion) {
9434   llvm_unreachable("Not supported in SIMD-only mode");
9435 }
9436 
9437 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
9438                                          SourceLocation Loc, const Expr *IfCond,
9439                                          OpenMPDirectiveKind CancelRegion) {
9440   llvm_unreachable("Not supported in SIMD-only mode");
9441 }
9442 
9443 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
9444     const OMPExecutableDirective &D, StringRef ParentName,
9445     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
9446     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
9447   llvm_unreachable("Not supported in SIMD-only mode");
9448 }
9449 
9450 void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF,
9451                                          const OMPExecutableDirective &D,
9452                                          llvm::Value *OutlinedFn,
9453                                          llvm::Value *OutlinedFnID,
9454                                          const Expr *IfCond, const Expr *Device) {
9455   llvm_unreachable("Not supported in SIMD-only mode");
9456 }
9457 
9458 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
9459   llvm_unreachable("Not supported in SIMD-only mode");
9460 }
9461 
9462 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9463   llvm_unreachable("Not supported in SIMD-only mode");
9464 }
9465 
9466 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
9467   return false;
9468 }
9469 
9470 llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() {
9471   return nullptr;
9472 }
9473 
9474 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
9475                                         const OMPExecutableDirective &D,
9476                                         SourceLocation Loc,
9477                                         llvm::Value *OutlinedFn,
9478                                         ArrayRef<llvm::Value *> CapturedVars) {
9479   llvm_unreachable("Not supported in SIMD-only mode");
9480 }
9481 
9482 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
9483                                              const Expr *NumTeams,
9484                                              const Expr *ThreadLimit,
9485                                              SourceLocation Loc) {
9486   llvm_unreachable("Not supported in SIMD-only mode");
9487 }
9488 
9489 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
9490     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
9491     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
9492   llvm_unreachable("Not supported in SIMD-only mode");
9493 }
9494 
9495 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
9496     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
9497     const Expr *Device) {
9498   llvm_unreachable("Not supported in SIMD-only mode");
9499 }
9500 
9501 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
9502                                            const OMPLoopDirective &D,
9503                                            ArrayRef<Expr *> NumIterations) {
9504   llvm_unreachable("Not supported in SIMD-only mode");
9505 }
9506 
9507 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
9508                                               const OMPDependClause *C) {
9509   llvm_unreachable("Not supported in SIMD-only mode");
9510 }
9511 
9512 const VarDecl *
9513 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
9514                                         const VarDecl *NativeParam) const {
9515   llvm_unreachable("Not supported in SIMD-only mode");
9516 }
9517 
9518 Address
9519 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
9520                                          const VarDecl *NativeParam,
9521                                          const VarDecl *TargetParam) const {
9522   llvm_unreachable("Not supported in SIMD-only mode");
9523 }
9524 
9525