1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for OpenMP runtime code generation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGOpenMPRuntime.h"
17 #include "CGRecordLayout.h"
18 #include "CodeGenFunction.h"
19 #include "clang/CodeGen/ConstantInitBuilder.h"
20 #include "clang/AST/Decl.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/ADT/BitmaskEnum.h"
24 #include "llvm/Bitcode/BitcodeReader.h"
25 #include "llvm/IR/CallSite.h"
26 #include "llvm/IR/DerivedTypes.h"
27 #include "llvm/IR/GlobalValue.h"
28 #include "llvm/IR/Value.h"
29 #include "llvm/Support/Format.h"
30 #include "llvm/Support/raw_ostream.h"
31 #include <cassert>
32 
33 using namespace clang;
34 using namespace CodeGen;
35 
36 namespace {
37 /// \brief Base class for handling code generation inside OpenMP regions.
38 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
39 public:
40   /// \brief Kinds of OpenMP regions used in codegen.
41   enum CGOpenMPRegionKind {
42     /// \brief Region with outlined function for standalone 'parallel'
43     /// directive.
44     ParallelOutlinedRegion,
45     /// \brief Region with outlined function for standalone 'task' directive.
46     TaskOutlinedRegion,
47     /// \brief Region for constructs that do not require function outlining,
48     /// like 'for', 'sections', 'atomic' etc. directives.
49     InlinedRegion,
50     /// \brief Region with outlined function for standalone 'target' directive.
51     TargetRegion,
52   };
53 
54   CGOpenMPRegionInfo(const CapturedStmt &CS,
55                      const CGOpenMPRegionKind RegionKind,
56                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
57                      bool HasCancel)
58       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
59         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
60 
61   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
62                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
63                      bool HasCancel)
64       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
65         Kind(Kind), HasCancel(HasCancel) {}
66 
67   /// \brief Get a variable or parameter for storing global thread id
68   /// inside OpenMP construct.
69   virtual const VarDecl *getThreadIDVariable() const = 0;
70 
71   /// \brief Emit the captured statement body.
72   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
73 
74   /// \brief Get an LValue for the current ThreadID variable.
75   /// \return LValue for thread id variable. This LValue always has type int32*.
76   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
77 
78   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
79 
80   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
81 
82   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
83 
84   bool hasCancel() const { return HasCancel; }
85 
86   static bool classof(const CGCapturedStmtInfo *Info) {
87     return Info->getKind() == CR_OpenMP;
88   }
89 
90   ~CGOpenMPRegionInfo() override = default;
91 
92 protected:
93   CGOpenMPRegionKind RegionKind;
94   RegionCodeGenTy CodeGen;
95   OpenMPDirectiveKind Kind;
96   bool HasCancel;
97 };
98 
99 /// \brief API for captured statement code generation in OpenMP constructs.
100 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
101 public:
102   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
103                              const RegionCodeGenTy &CodeGen,
104                              OpenMPDirectiveKind Kind, bool HasCancel,
105                              StringRef HelperName)
106       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
107                            HasCancel),
108         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
109     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
110   }
111 
112   /// \brief Get a variable or parameter for storing global thread id
113   /// inside OpenMP construct.
114   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
115 
116   /// \brief Get the name of the capture helper.
117   StringRef getHelperName() const override { return HelperName; }
118 
119   static bool classof(const CGCapturedStmtInfo *Info) {
120     return CGOpenMPRegionInfo::classof(Info) &&
121            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
122                ParallelOutlinedRegion;
123   }
124 
125 private:
126   /// \brief A variable or parameter storing global thread id for OpenMP
127   /// constructs.
128   const VarDecl *ThreadIDVar;
129   StringRef HelperName;
130 };
131 
132 /// \brief API for captured statement code generation in OpenMP constructs.
133 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
134 public:
135   class UntiedTaskActionTy final : public PrePostActionTy {
136     bool Untied;
137     const VarDecl *PartIDVar;
138     const RegionCodeGenTy UntiedCodeGen;
139     llvm::SwitchInst *UntiedSwitch = nullptr;
140 
141   public:
142     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
143                        const RegionCodeGenTy &UntiedCodeGen)
144         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
145     void Enter(CodeGenFunction &CGF) override {
146       if (Untied) {
147         // Emit task switching point.
148         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
149             CGF.GetAddrOfLocalVar(PartIDVar),
150             PartIDVar->getType()->castAs<PointerType>());
151         llvm::Value *Res =
152             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
153         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
154         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
155         CGF.EmitBlock(DoneBB);
156         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
157         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
158         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
159                               CGF.Builder.GetInsertBlock());
160         emitUntiedSwitch(CGF);
161       }
162     }
163     void emitUntiedSwitch(CodeGenFunction &CGF) const {
164       if (Untied) {
165         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
166             CGF.GetAddrOfLocalVar(PartIDVar),
167             PartIDVar->getType()->castAs<PointerType>());
168         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
169                               PartIdLVal);
170         UntiedCodeGen(CGF);
171         CodeGenFunction::JumpDest CurPoint =
172             CGF.getJumpDestInCurrentScope(".untied.next.");
173         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
174         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
175         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
176                               CGF.Builder.GetInsertBlock());
177         CGF.EmitBranchThroughCleanup(CurPoint);
178         CGF.EmitBlock(CurPoint.getBlock());
179       }
180     }
181     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
182   };
183   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
184                                  const VarDecl *ThreadIDVar,
185                                  const RegionCodeGenTy &CodeGen,
186                                  OpenMPDirectiveKind Kind, bool HasCancel,
187                                  const UntiedTaskActionTy &Action)
188       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
189         ThreadIDVar(ThreadIDVar), Action(Action) {
190     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
191   }
192 
193   /// \brief Get a variable or parameter for storing global thread id
194   /// inside OpenMP construct.
195   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
196 
197   /// \brief Get an LValue for the current ThreadID variable.
198   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
199 
200   /// \brief Get the name of the capture helper.
201   StringRef getHelperName() const override { return ".omp_outlined."; }
202 
203   void emitUntiedSwitch(CodeGenFunction &CGF) override {
204     Action.emitUntiedSwitch(CGF);
205   }
206 
207   static bool classof(const CGCapturedStmtInfo *Info) {
208     return CGOpenMPRegionInfo::classof(Info) &&
209            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
210                TaskOutlinedRegion;
211   }
212 
213 private:
214   /// \brief A variable or parameter storing global thread id for OpenMP
215   /// constructs.
216   const VarDecl *ThreadIDVar;
217   /// Action for emitting code for untied tasks.
218   const UntiedTaskActionTy &Action;
219 };
220 
221 /// \brief API for inlined captured statement code generation in OpenMP
222 /// constructs.
223 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
224 public:
225   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
226                             const RegionCodeGenTy &CodeGen,
227                             OpenMPDirectiveKind Kind, bool HasCancel)
228       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
229         OldCSI(OldCSI),
230         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
231 
232   // \brief Retrieve the value of the context parameter.
233   llvm::Value *getContextValue() const override {
234     if (OuterRegionInfo)
235       return OuterRegionInfo->getContextValue();
236     llvm_unreachable("No context value for inlined OpenMP region");
237   }
238 
239   void setContextValue(llvm::Value *V) override {
240     if (OuterRegionInfo) {
241       OuterRegionInfo->setContextValue(V);
242       return;
243     }
244     llvm_unreachable("No context value for inlined OpenMP region");
245   }
246 
247   /// \brief Lookup the captured field decl for a variable.
248   const FieldDecl *lookup(const VarDecl *VD) const override {
249     if (OuterRegionInfo)
250       return OuterRegionInfo->lookup(VD);
251     // If there is no outer outlined region,no need to lookup in a list of
252     // captured variables, we can use the original one.
253     return nullptr;
254   }
255 
256   FieldDecl *getThisFieldDecl() const override {
257     if (OuterRegionInfo)
258       return OuterRegionInfo->getThisFieldDecl();
259     return nullptr;
260   }
261 
262   /// \brief Get a variable or parameter for storing global thread id
263   /// inside OpenMP construct.
264   const VarDecl *getThreadIDVariable() const override {
265     if (OuterRegionInfo)
266       return OuterRegionInfo->getThreadIDVariable();
267     return nullptr;
268   }
269 
270   /// \brief Get an LValue for the current ThreadID variable.
271   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
272     if (OuterRegionInfo)
273       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
274     llvm_unreachable("No LValue for inlined OpenMP construct");
275   }
276 
277   /// \brief Get the name of the capture helper.
278   StringRef getHelperName() const override {
279     if (auto *OuterRegionInfo = getOldCSI())
280       return OuterRegionInfo->getHelperName();
281     llvm_unreachable("No helper name for inlined OpenMP construct");
282   }
283 
284   void emitUntiedSwitch(CodeGenFunction &CGF) override {
285     if (OuterRegionInfo)
286       OuterRegionInfo->emitUntiedSwitch(CGF);
287   }
288 
289   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
290 
291   static bool classof(const CGCapturedStmtInfo *Info) {
292     return CGOpenMPRegionInfo::classof(Info) &&
293            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
294   }
295 
296   ~CGOpenMPInlinedRegionInfo() override = default;
297 
298 private:
299   /// \brief CodeGen info about outer OpenMP region.
300   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
301   CGOpenMPRegionInfo *OuterRegionInfo;
302 };
303 
304 /// \brief API for captured statement code generation in OpenMP target
305 /// constructs. For this captures, implicit parameters are used instead of the
306 /// captured fields. The name of the target region has to be unique in a given
307 /// application so it is provided by the client, because only the client has
308 /// the information to generate that.
309 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
310 public:
311   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
312                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
313       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
314                            /*HasCancel=*/false),
315         HelperName(HelperName) {}
316 
317   /// \brief This is unused for target regions because each starts executing
318   /// with a single thread.
319   const VarDecl *getThreadIDVariable() const override { return nullptr; }
320 
321   /// \brief Get the name of the capture helper.
322   StringRef getHelperName() const override { return HelperName; }
323 
324   static bool classof(const CGCapturedStmtInfo *Info) {
325     return CGOpenMPRegionInfo::classof(Info) &&
326            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
327   }
328 
329 private:
330   StringRef HelperName;
331 };
332 
333 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
334   llvm_unreachable("No codegen for expressions");
335 }
336 /// \brief API for generation of expressions captured in a innermost OpenMP
337 /// region.
338 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
339 public:
340   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
341       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
342                                   OMPD_unknown,
343                                   /*HasCancel=*/false),
344         PrivScope(CGF) {
345     // Make sure the globals captured in the provided statement are local by
346     // using the privatization logic. We assume the same variable is not
347     // captured more than once.
348     for (const auto &C : CS.captures()) {
349       if (!C.capturesVariable() && !C.capturesVariableByCopy())
350         continue;
351 
352       const VarDecl *VD = C.getCapturedVar();
353       if (VD->isLocalVarDeclOrParm())
354         continue;
355 
356       DeclRefExpr DRE(const_cast<VarDecl *>(VD),
357                       /*RefersToEnclosingVariableOrCapture=*/false,
358                       VD->getType().getNonReferenceType(), VK_LValue,
359                       C.getLocation());
360       PrivScope.addPrivate(
361           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); });
362     }
363     (void)PrivScope.Privatize();
364   }
365 
366   /// \brief Lookup the captured field decl for a variable.
367   const FieldDecl *lookup(const VarDecl *VD) const override {
368     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
369       return FD;
370     return nullptr;
371   }
372 
373   /// \brief Emit the captured statement body.
374   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
375     llvm_unreachable("No body for expressions");
376   }
377 
378   /// \brief Get a variable or parameter for storing global thread id
379   /// inside OpenMP construct.
380   const VarDecl *getThreadIDVariable() const override {
381     llvm_unreachable("No thread id for expressions");
382   }
383 
384   /// \brief Get the name of the capture helper.
385   StringRef getHelperName() const override {
386     llvm_unreachable("No helper name for expressions");
387   }
388 
389   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
390 
391 private:
392   /// Private scope to capture global variables.
393   CodeGenFunction::OMPPrivateScope PrivScope;
394 };
395 
396 /// \brief RAII for emitting code of OpenMP constructs.
397 class InlinedOpenMPRegionRAII {
398   CodeGenFunction &CGF;
399   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
400   FieldDecl *LambdaThisCaptureField = nullptr;
401   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
402 
403 public:
404   /// \brief Constructs region for combined constructs.
405   /// \param CodeGen Code generation sequence for combined directives. Includes
406   /// a list of functions used for code generation of implicitly inlined
407   /// regions.
408   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
409                           OpenMPDirectiveKind Kind, bool HasCancel)
410       : CGF(CGF) {
411     // Start emission for the construct.
412     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
413         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
414     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
415     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
416     CGF.LambdaThisCaptureField = nullptr;
417     BlockInfo = CGF.BlockInfo;
418     CGF.BlockInfo = nullptr;
419   }
420 
421   ~InlinedOpenMPRegionRAII() {
422     // Restore original CapturedStmtInfo only if we're done with code emission.
423     auto *OldCSI =
424         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
425     delete CGF.CapturedStmtInfo;
426     CGF.CapturedStmtInfo = OldCSI;
427     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
428     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
429     CGF.BlockInfo = BlockInfo;
430   }
431 };
432 
433 /// \brief Values for bit flags used in the ident_t to describe the fields.
434 /// All enumeric elements are named and described in accordance with the code
435 /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
436 enum OpenMPLocationFlags : unsigned {
437   /// \brief Use trampoline for internal microtask.
438   OMP_IDENT_IMD = 0x01,
439   /// \brief Use c-style ident structure.
440   OMP_IDENT_KMPC = 0x02,
441   /// \brief Atomic reduction option for kmpc_reduce.
442   OMP_ATOMIC_REDUCE = 0x10,
443   /// \brief Explicit 'barrier' directive.
444   OMP_IDENT_BARRIER_EXPL = 0x20,
445   /// \brief Implicit barrier in code.
446   OMP_IDENT_BARRIER_IMPL = 0x40,
447   /// \brief Implicit barrier in 'for' directive.
448   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
449   /// \brief Implicit barrier in 'sections' directive.
450   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
451   /// \brief Implicit barrier in 'single' directive.
452   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
453   /// Call of __kmp_for_static_init for static loop.
454   OMP_IDENT_WORK_LOOP = 0x200,
455   /// Call of __kmp_for_static_init for sections.
456   OMP_IDENT_WORK_SECTIONS = 0x400,
457   /// Call of __kmp_for_static_init for distribute.
458   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
459   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
460 };
461 
462 /// \brief Describes ident structure that describes a source location.
463 /// All descriptions are taken from
464 /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
465 /// Original structure:
466 /// typedef struct ident {
467 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
468 ///                                  see above  */
469 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
470 ///                                  KMP_IDENT_KMPC identifies this union
471 ///                                  member  */
472 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
473 ///                                  see above */
474 ///#if USE_ITT_BUILD
475 ///                            /*  but currently used for storing
476 ///                                region-specific ITT */
477 ///                            /*  contextual information. */
478 ///#endif /* USE_ITT_BUILD */
479 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
480 ///                                 C++  */
481 ///    char const *psource;    /**< String describing the source location.
482 ///                            The string is composed of semi-colon separated
483 //                             fields which describe the source file,
484 ///                            the function and a pair of line numbers that
485 ///                            delimit the construct.
486 ///                             */
487 /// } ident_t;
488 enum IdentFieldIndex {
489   /// \brief might be used in Fortran
490   IdentField_Reserved_1,
491   /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
492   IdentField_Flags,
493   /// \brief Not really used in Fortran any more
494   IdentField_Reserved_2,
495   /// \brief Source[4] in Fortran, do not use for C++
496   IdentField_Reserved_3,
497   /// \brief String describing the source location. The string is composed of
498   /// semi-colon separated fields which describe the source file, the function
499   /// and a pair of line numbers that delimit the construct.
500   IdentField_PSource
501 };
502 
503 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from
504 /// the enum sched_type in kmp.h).
505 enum OpenMPSchedType {
506   /// \brief Lower bound for default (unordered) versions.
507   OMP_sch_lower = 32,
508   OMP_sch_static_chunked = 33,
509   OMP_sch_static = 34,
510   OMP_sch_dynamic_chunked = 35,
511   OMP_sch_guided_chunked = 36,
512   OMP_sch_runtime = 37,
513   OMP_sch_auto = 38,
514   /// static with chunk adjustment (e.g., simd)
515   OMP_sch_static_balanced_chunked = 45,
516   /// \brief Lower bound for 'ordered' versions.
517   OMP_ord_lower = 64,
518   OMP_ord_static_chunked = 65,
519   OMP_ord_static = 66,
520   OMP_ord_dynamic_chunked = 67,
521   OMP_ord_guided_chunked = 68,
522   OMP_ord_runtime = 69,
523   OMP_ord_auto = 70,
524   OMP_sch_default = OMP_sch_static,
525   /// \brief dist_schedule types
526   OMP_dist_sch_static_chunked = 91,
527   OMP_dist_sch_static = 92,
528   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
529   /// Set if the monotonic schedule modifier was present.
530   OMP_sch_modifier_monotonic = (1 << 29),
531   /// Set if the nonmonotonic schedule modifier was present.
532   OMP_sch_modifier_nonmonotonic = (1 << 30),
533 };
534 
535 enum OpenMPRTLFunction {
536   /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
537   /// kmpc_micro microtask, ...);
538   OMPRTL__kmpc_fork_call,
539   /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc,
540   /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
541   OMPRTL__kmpc_threadprivate_cached,
542   /// \brief Call to void __kmpc_threadprivate_register( ident_t *,
543   /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
544   OMPRTL__kmpc_threadprivate_register,
545   // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
546   OMPRTL__kmpc_global_thread_num,
547   // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
548   // kmp_critical_name *crit);
549   OMPRTL__kmpc_critical,
550   // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
551   // global_tid, kmp_critical_name *crit, uintptr_t hint);
552   OMPRTL__kmpc_critical_with_hint,
553   // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
554   // kmp_critical_name *crit);
555   OMPRTL__kmpc_end_critical,
556   // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
557   // global_tid);
558   OMPRTL__kmpc_cancel_barrier,
559   // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
560   OMPRTL__kmpc_barrier,
561   // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
562   OMPRTL__kmpc_for_static_fini,
563   // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
564   // global_tid);
565   OMPRTL__kmpc_serialized_parallel,
566   // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
567   // global_tid);
568   OMPRTL__kmpc_end_serialized_parallel,
569   // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
570   // kmp_int32 num_threads);
571   OMPRTL__kmpc_push_num_threads,
572   // Call to void __kmpc_flush(ident_t *loc);
573   OMPRTL__kmpc_flush,
574   // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
575   OMPRTL__kmpc_master,
576   // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
577   OMPRTL__kmpc_end_master,
578   // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
579   // int end_part);
580   OMPRTL__kmpc_omp_taskyield,
581   // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
582   OMPRTL__kmpc_single,
583   // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
584   OMPRTL__kmpc_end_single,
585   // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
586   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
587   // kmp_routine_entry_t *task_entry);
588   OMPRTL__kmpc_omp_task_alloc,
589   // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
590   // new_task);
591   OMPRTL__kmpc_omp_task,
592   // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
593   // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
594   // kmp_int32 didit);
595   OMPRTL__kmpc_copyprivate,
596   // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
597   // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
598   // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
599   OMPRTL__kmpc_reduce,
600   // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
601   // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
602   // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
603   // *lck);
604   OMPRTL__kmpc_reduce_nowait,
605   // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
606   // kmp_critical_name *lck);
607   OMPRTL__kmpc_end_reduce,
608   // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
609   // kmp_critical_name *lck);
610   OMPRTL__kmpc_end_reduce_nowait,
611   // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
612   // kmp_task_t * new_task);
613   OMPRTL__kmpc_omp_task_begin_if0,
614   // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
615   // kmp_task_t * new_task);
616   OMPRTL__kmpc_omp_task_complete_if0,
617   // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
618   OMPRTL__kmpc_ordered,
619   // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
620   OMPRTL__kmpc_end_ordered,
621   // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
622   // global_tid);
623   OMPRTL__kmpc_omp_taskwait,
624   // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
625   OMPRTL__kmpc_taskgroup,
626   // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
627   OMPRTL__kmpc_end_taskgroup,
628   // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
629   // int proc_bind);
630   OMPRTL__kmpc_push_proc_bind,
631   // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
632   // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
633   // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
634   OMPRTL__kmpc_omp_task_with_deps,
635   // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
636   // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
637   // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
638   OMPRTL__kmpc_omp_wait_deps,
639   // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
640   // global_tid, kmp_int32 cncl_kind);
641   OMPRTL__kmpc_cancellationpoint,
642   // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
643   // kmp_int32 cncl_kind);
644   OMPRTL__kmpc_cancel,
645   // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
646   // kmp_int32 num_teams, kmp_int32 thread_limit);
647   OMPRTL__kmpc_push_num_teams,
648   // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
649   // microtask, ...);
650   OMPRTL__kmpc_fork_teams,
651   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
652   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
653   // sched, kmp_uint64 grainsize, void *task_dup);
654   OMPRTL__kmpc_taskloop,
655   // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
656   // num_dims, struct kmp_dim *dims);
657   OMPRTL__kmpc_doacross_init,
658   // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
659   OMPRTL__kmpc_doacross_fini,
660   // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
661   // *vec);
662   OMPRTL__kmpc_doacross_post,
663   // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
664   // *vec);
665   OMPRTL__kmpc_doacross_wait,
666   // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
667   // *data);
668   OMPRTL__kmpc_task_reduction_init,
669   // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
670   // *d);
671   OMPRTL__kmpc_task_reduction_get_th_data,
672 
673   //
674   // Offloading related calls
675   //
676   // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
677   // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
678   // *arg_types);
679   OMPRTL__tgt_target,
680   // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
681   // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
682   // *arg_types);
683   OMPRTL__tgt_target_nowait,
684   // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
685   // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
686   // *arg_types, int32_t num_teams, int32_t thread_limit);
687   OMPRTL__tgt_target_teams,
688   // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
689   // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t
690   // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
691   OMPRTL__tgt_target_teams_nowait,
692   // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
693   OMPRTL__tgt_register_lib,
694   // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
695   OMPRTL__tgt_unregister_lib,
696   // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
697   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
698   OMPRTL__tgt_target_data_begin,
699   // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
700   // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
701   // *arg_types);
702   OMPRTL__tgt_target_data_begin_nowait,
703   // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
704   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
705   OMPRTL__tgt_target_data_end,
706   // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
707   // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
708   // *arg_types);
709   OMPRTL__tgt_target_data_end_nowait,
710   // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
711   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
712   OMPRTL__tgt_target_data_update,
713   // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
714   // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
715   // *arg_types);
716   OMPRTL__tgt_target_data_update_nowait,
717 };
718 
719 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
720 /// region.
721 class CleanupTy final : public EHScopeStack::Cleanup {
722   PrePostActionTy *Action;
723 
724 public:
725   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
726   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
727     if (!CGF.HaveInsertPoint())
728       return;
729     Action->Exit(CGF);
730   }
731 };
732 
733 } // anonymous namespace
734 
735 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
736   CodeGenFunction::RunCleanupsScope Scope(CGF);
737   if (PrePostAction) {
738     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
739     Callback(CodeGen, CGF, *PrePostAction);
740   } else {
741     PrePostActionTy Action;
742     Callback(CodeGen, CGF, Action);
743   }
744 }
745 
746 /// Check if the combiner is a call to UDR combiner and if it is so return the
747 /// UDR decl used for reduction.
748 static const OMPDeclareReductionDecl *
749 getReductionInit(const Expr *ReductionOp) {
750   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
751     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
752       if (const auto *DRE =
753               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
754         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
755           return DRD;
756   return nullptr;
757 }
758 
759 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
760                                              const OMPDeclareReductionDecl *DRD,
761                                              const Expr *InitOp,
762                                              Address Private, Address Original,
763                                              QualType Ty) {
764   if (DRD->getInitializer()) {
765     std::pair<llvm::Function *, llvm::Function *> Reduction =
766         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
767     const auto *CE = cast<CallExpr>(InitOp);
768     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
769     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
770     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
771     const auto *LHSDRE =
772         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
773     const auto *RHSDRE =
774         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
775     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
776     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
777                             [=]() { return Private; });
778     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
779                             [=]() { return Original; });
780     (void)PrivateScope.Privatize();
781     RValue Func = RValue::get(Reduction.second);
782     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
783     CGF.EmitIgnoredExpr(InitOp);
784   } else {
785     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
786     auto *GV = new llvm::GlobalVariable(
787         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
788         llvm::GlobalValue::PrivateLinkage, Init, ".init");
789     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
790     RValue InitRVal;
791     switch (CGF.getEvaluationKind(Ty)) {
792     case TEK_Scalar:
793       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
794       break;
795     case TEK_Complex:
796       InitRVal =
797           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
798       break;
799     case TEK_Aggregate:
800       InitRVal = RValue::getAggregate(LV.getAddress());
801       break;
802     }
803     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
804     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
805     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
806                          /*IsInitializer=*/false);
807   }
808 }
809 
810 /// \brief Emit initialization of arrays of complex types.
811 /// \param DestAddr Address of the array.
812 /// \param Type Type of array.
813 /// \param Init Initial expression of array.
814 /// \param SrcAddr Address of the original array.
815 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
816                                  QualType Type, bool EmitDeclareReductionInit,
817                                  const Expr *Init,
818                                  const OMPDeclareReductionDecl *DRD,
819                                  Address SrcAddr = Address::invalid()) {
820   // Perform element-by-element initialization.
821   QualType ElementTy;
822 
823   // Drill down to the base element type on both arrays.
824   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
825   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
826   DestAddr =
827       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
828   if (DRD)
829     SrcAddr =
830         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
831 
832   llvm::Value *SrcBegin = nullptr;
833   if (DRD)
834     SrcBegin = SrcAddr.getPointer();
835   llvm::Value *DestBegin = DestAddr.getPointer();
836   // Cast from pointer to array type to pointer to single element.
837   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
838   // The basic structure here is a while-do loop.
839   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
840   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
841   llvm::Value *IsEmpty =
842       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
843   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
844 
845   // Enter the loop body, making that address the current address.
846   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
847   CGF.EmitBlock(BodyBB);
848 
849   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
850 
851   llvm::PHINode *SrcElementPHI = nullptr;
852   Address SrcElementCurrent = Address::invalid();
853   if (DRD) {
854     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
855                                           "omp.arraycpy.srcElementPast");
856     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
857     SrcElementCurrent =
858         Address(SrcElementPHI,
859                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
860   }
861   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
862       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
863   DestElementPHI->addIncoming(DestBegin, EntryBB);
864   Address DestElementCurrent =
865       Address(DestElementPHI,
866               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
867 
868   // Emit copy.
869   {
870     CodeGenFunction::RunCleanupsScope InitScope(CGF);
871     if (EmitDeclareReductionInit) {
872       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
873                                        SrcElementCurrent, ElementTy);
874     } else
875       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
876                            /*IsInitializer=*/false);
877   }
878 
879   if (DRD) {
880     // Shift the address forward by one element.
881     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
882         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
883     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
884   }
885 
886   // Shift the address forward by one element.
887   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
888       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
889   // Check whether we've reached the end.
890   llvm::Value *Done =
891       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
892   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
893   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
894 
895   // Done.
896   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
897 }
898 
899 static llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy>
900 isDeclareTargetDeclaration(const ValueDecl *VD) {
901   if (const auto *MD = dyn_cast<CXXMethodDecl>(VD))
902     if (!MD->isStatic())
903       return llvm::None;
904   for (const Decl *D : VD->redecls()) {
905     if (!D->hasAttrs())
906       continue;
907     if (const auto *Attr = D->getAttr<OMPDeclareTargetDeclAttr>())
908       return Attr->getMapType();
909   }
910   return llvm::None;
911 }
912 
913 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
914   return CGF.EmitOMPSharedLValue(E);
915 }
916 
917 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
918                                             const Expr *E) {
919   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
920     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
921   return LValue();
922 }
923 
924 void ReductionCodeGen::emitAggregateInitialization(
925     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
926     const OMPDeclareReductionDecl *DRD) {
927   // Emit VarDecl with copy init for arrays.
928   // Get the address of the original variable captured in current
929   // captured region.
930   const auto *PrivateVD =
931       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
932   bool EmitDeclareReductionInit =
933       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
934   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
935                        EmitDeclareReductionInit,
936                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
937                                                 : PrivateVD->getInit(),
938                        DRD, SharedLVal.getAddress());
939 }
940 
941 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
942                                    ArrayRef<const Expr *> Privates,
943                                    ArrayRef<const Expr *> ReductionOps) {
944   ClausesData.reserve(Shareds.size());
945   SharedAddresses.reserve(Shareds.size());
946   Sizes.reserve(Shareds.size());
947   BaseDecls.reserve(Shareds.size());
948   auto IPriv = Privates.begin();
949   auto IRed = ReductionOps.begin();
950   for (const Expr *Ref : Shareds) {
951     ClausesData.emplace_back(Ref, *IPriv, *IRed);
952     std::advance(IPriv, 1);
953     std::advance(IRed, 1);
954   }
955 }
956 
957 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
958   assert(SharedAddresses.size() == N &&
959          "Number of generated lvalues must be exactly N.");
960   LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
961   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
962   SharedAddresses.emplace_back(First, Second);
963 }
964 
965 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
966   const auto *PrivateVD =
967       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
968   QualType PrivateType = PrivateVD->getType();
969   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
970   if (!PrivateType->isVariablyModifiedType()) {
971     Sizes.emplace_back(
972         CGF.getTypeSize(
973             SharedAddresses[N].first.getType().getNonReferenceType()),
974         nullptr);
975     return;
976   }
977   llvm::Value *Size;
978   llvm::Value *SizeInChars;
979   auto *ElemType =
980       cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType())
981           ->getElementType();
982   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
983   if (AsArraySection) {
984     Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
985                                      SharedAddresses[N].first.getPointer());
986     Size = CGF.Builder.CreateNUWAdd(
987         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
988     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
989   } else {
990     SizeInChars = CGF.getTypeSize(
991         SharedAddresses[N].first.getType().getNonReferenceType());
992     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
993   }
994   Sizes.emplace_back(SizeInChars, Size);
995   CodeGenFunction::OpaqueValueMapping OpaqueMap(
996       CGF,
997       cast<OpaqueValueExpr>(
998           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
999       RValue::get(Size));
1000   CGF.EmitVariablyModifiedType(PrivateType);
1001 }
1002 
1003 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
1004                                          llvm::Value *Size) {
1005   const auto *PrivateVD =
1006       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1007   QualType PrivateType = PrivateVD->getType();
1008   if (!PrivateType->isVariablyModifiedType()) {
1009     assert(!Size && !Sizes[N].second &&
1010            "Size should be nullptr for non-variably modified reduction "
1011            "items.");
1012     return;
1013   }
1014   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1015       CGF,
1016       cast<OpaqueValueExpr>(
1017           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1018       RValue::get(Size));
1019   CGF.EmitVariablyModifiedType(PrivateType);
1020 }
1021 
1022 void ReductionCodeGen::emitInitialization(
1023     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1024     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1025   assert(SharedAddresses.size() > N && "No variable was generated");
1026   const auto *PrivateVD =
1027       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1028   const OMPDeclareReductionDecl *DRD =
1029       getReductionInit(ClausesData[N].ReductionOp);
1030   QualType PrivateType = PrivateVD->getType();
1031   PrivateAddr = CGF.Builder.CreateElementBitCast(
1032       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1033   QualType SharedType = SharedAddresses[N].first.getType();
1034   SharedLVal = CGF.MakeAddrLValue(
1035       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
1036                                        CGF.ConvertTypeForMem(SharedType)),
1037       SharedType, SharedAddresses[N].first.getBaseInfo(),
1038       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1039   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1040     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1041   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1042     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1043                                      PrivateAddr, SharedLVal.getAddress(),
1044                                      SharedLVal.getType());
1045   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1046              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1047     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1048                          PrivateVD->getType().getQualifiers(),
1049                          /*IsInitializer=*/false);
1050   }
1051 }
1052 
1053 bool ReductionCodeGen::needCleanups(unsigned N) {
1054   const auto *PrivateVD =
1055       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1056   QualType PrivateType = PrivateVD->getType();
1057   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1058   return DTorKind != QualType::DK_none;
1059 }
1060 
1061 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
1062                                     Address PrivateAddr) {
1063   const auto *PrivateVD =
1064       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1065   QualType PrivateType = PrivateVD->getType();
1066   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1067   if (needCleanups(N)) {
1068     PrivateAddr = CGF.Builder.CreateElementBitCast(
1069         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1070     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1071   }
1072 }
1073 
1074 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1075                           LValue BaseLV) {
1076   BaseTy = BaseTy.getNonReferenceType();
1077   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1078          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1079     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
1080       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
1081     } else {
1082       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
1083       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1084     }
1085     BaseTy = BaseTy->getPointeeType();
1086   }
1087   return CGF.MakeAddrLValue(
1088       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(),
1089                                        CGF.ConvertTypeForMem(ElTy)),
1090       BaseLV.getType(), BaseLV.getBaseInfo(),
1091       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1092 }
1093 
1094 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1095                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1096                           llvm::Value *Addr) {
1097   Address Tmp = Address::invalid();
1098   Address TopTmp = Address::invalid();
1099   Address MostTopTmp = Address::invalid();
1100   BaseTy = BaseTy.getNonReferenceType();
1101   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1102          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1103     Tmp = CGF.CreateMemTemp(BaseTy);
1104     if (TopTmp.isValid())
1105       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1106     else
1107       MostTopTmp = Tmp;
1108     TopTmp = Tmp;
1109     BaseTy = BaseTy->getPointeeType();
1110   }
1111   llvm::Type *Ty = BaseLVType;
1112   if (Tmp.isValid())
1113     Ty = Tmp.getElementType();
1114   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1115   if (Tmp.isValid()) {
1116     CGF.Builder.CreateStore(Addr, Tmp);
1117     return MostTopTmp;
1118   }
1119   return Address(Addr, BaseLVAlignment);
1120 }
1121 
1122 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1123   const VarDecl *OrigVD = nullptr;
1124   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1125     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
1126     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1127       Base = TempOASE->getBase()->IgnoreParenImpCasts();
1128     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1129       Base = TempASE->getBase()->IgnoreParenImpCasts();
1130     DE = cast<DeclRefExpr>(Base);
1131     OrigVD = cast<VarDecl>(DE->getDecl());
1132   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1133     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1134     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1135       Base = TempASE->getBase()->IgnoreParenImpCasts();
1136     DE = cast<DeclRefExpr>(Base);
1137     OrigVD = cast<VarDecl>(DE->getDecl());
1138   }
1139   return OrigVD;
1140 }
1141 
1142 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1143                                                Address PrivateAddr) {
1144   const DeclRefExpr *DE;
1145   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1146     BaseDecls.emplace_back(OrigVD);
1147     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1148     LValue BaseLValue =
1149         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1150                     OriginalBaseLValue);
1151     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1152         BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
1153     llvm::Value *PrivatePointer =
1154         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1155             PrivateAddr.getPointer(),
1156             SharedAddresses[N].first.getAddress().getType());
1157     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1158     return castToBase(CGF, OrigVD->getType(),
1159                       SharedAddresses[N].first.getType(),
1160                       OriginalBaseLValue.getAddress().getType(),
1161                       OriginalBaseLValue.getAlignment(), Ptr);
1162   }
1163   BaseDecls.emplace_back(
1164       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1165   return PrivateAddr;
1166 }
1167 
1168 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1169   const OMPDeclareReductionDecl *DRD =
1170       getReductionInit(ClausesData[N].ReductionOp);
1171   return DRD && DRD->getInitializer();
1172 }
1173 
1174 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1175   return CGF.EmitLoadOfPointerLValue(
1176       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1177       getThreadIDVariable()->getType()->castAs<PointerType>());
1178 }
1179 
1180 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1181   if (!CGF.HaveInsertPoint())
1182     return;
1183   // 1.2.2 OpenMP Language Terminology
1184   // Structured block - An executable statement with a single entry at the
1185   // top and a single exit at the bottom.
1186   // The point of exit cannot be a branch out of the structured block.
1187   // longjmp() and throw() must not violate the entry/exit criteria.
1188   CGF.EHStack.pushTerminate();
1189   CodeGen(CGF);
1190   CGF.EHStack.popTerminate();
1191 }
1192 
1193 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1194     CodeGenFunction &CGF) {
1195   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1196                             getThreadIDVariable()->getType(),
1197                             AlignmentSource::Decl);
1198 }
1199 
1200 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1201                                        QualType FieldTy) {
1202   auto *Field = FieldDecl::Create(
1203       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1204       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1205       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1206   Field->setAccess(AS_public);
1207   DC->addDecl(Field);
1208   return Field;
1209 }
1210 
1211 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
1212     : CGM(CGM), OffloadEntriesInfoManager(CGM) {
1213   ASTContext &C = CGM.getContext();
1214   RecordDecl *RD = C.buildImplicitRecord("ident_t");
1215   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1216   RD->startDefinition();
1217   // reserved_1
1218   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1219   // flags
1220   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1221   // reserved_2
1222   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1223   // reserved_3
1224   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1225   // psource
1226   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1227   RD->completeDefinition();
1228   IdentQTy = C.getRecordType(RD);
1229   IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1230   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1231 
1232   loadOffloadInfoMetadata();
1233 }
1234 
1235 void CGOpenMPRuntime::clear() {
1236   InternalVars.clear();
1237 }
1238 
1239 static llvm::Function *
1240 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1241                           const Expr *CombinerInitializer, const VarDecl *In,
1242                           const VarDecl *Out, bool IsCombiner) {
1243   // void .omp_combiner.(Ty *in, Ty *out);
1244   ASTContext &C = CGM.getContext();
1245   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1246   FunctionArgList Args;
1247   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1248                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1249   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1250                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1251   Args.push_back(&OmpOutParm);
1252   Args.push_back(&OmpInParm);
1253   const CGFunctionInfo &FnInfo =
1254       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1255   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1256   auto *Fn = llvm::Function::Create(
1257       FnTy, llvm::GlobalValue::InternalLinkage,
1258       IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule());
1259   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1260   Fn->removeFnAttr(llvm::Attribute::NoInline);
1261   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1262   Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1263   CodeGenFunction CGF(CGM);
1264   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1265   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1266   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1267                     Out->getLocation());
1268   CodeGenFunction::OMPPrivateScope Scope(CGF);
1269   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1270   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1271     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1272         .getAddress();
1273   });
1274   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1275   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1276     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1277         .getAddress();
1278   });
1279   (void)Scope.Privatize();
1280   if (!IsCombiner && Out->hasInit() &&
1281       !CGF.isTrivialInitializer(Out->getInit())) {
1282     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1283                          Out->getType().getQualifiers(),
1284                          /*IsInitializer=*/true);
1285   }
1286   if (CombinerInitializer)
1287     CGF.EmitIgnoredExpr(CombinerInitializer);
1288   Scope.ForceCleanup();
1289   CGF.FinishFunction();
1290   return Fn;
1291 }
1292 
1293 void CGOpenMPRuntime::emitUserDefinedReduction(
1294     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1295   if (UDRMap.count(D) > 0)
1296     return;
1297   ASTContext &C = CGM.getContext();
1298   if (!In || !Out) {
1299     In = &C.Idents.get("omp_in");
1300     Out = &C.Idents.get("omp_out");
1301   }
1302   llvm::Function *Combiner = emitCombinerOrInitializer(
1303       CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()),
1304       cast<VarDecl>(D->lookup(Out).front()),
1305       /*IsCombiner=*/true);
1306   llvm::Function *Initializer = nullptr;
1307   if (const Expr *Init = D->getInitializer()) {
1308     if (!Priv || !Orig) {
1309       Priv = &C.Idents.get("omp_priv");
1310       Orig = &C.Idents.get("omp_orig");
1311     }
1312     Initializer = emitCombinerOrInitializer(
1313         CGM, D->getType(),
1314         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1315                                                                      : nullptr,
1316         cast<VarDecl>(D->lookup(Orig).front()),
1317         cast<VarDecl>(D->lookup(Priv).front()),
1318         /*IsCombiner=*/false);
1319   }
1320   UDRMap.try_emplace(D, Combiner, Initializer);
1321   if (CGF) {
1322     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1323     Decls.second.push_back(D);
1324   }
1325 }
1326 
1327 std::pair<llvm::Function *, llvm::Function *>
1328 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1329   auto I = UDRMap.find(D);
1330   if (I != UDRMap.end())
1331     return I->second;
1332   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1333   return UDRMap.lookup(D);
1334 }
1335 
1336 static llvm::Value *emitParallelOrTeamsOutlinedFunction(
1337     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1338     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1339     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1340   assert(ThreadIDVar->getType()->isPointerType() &&
1341          "thread id variable must be of type kmp_int32 *");
1342   CodeGenFunction CGF(CGM, true);
1343   bool HasCancel = false;
1344   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1345     HasCancel = OPD->hasCancel();
1346   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1347     HasCancel = OPSD->hasCancel();
1348   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1349     HasCancel = OPFD->hasCancel();
1350   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1351     HasCancel = OPFD->hasCancel();
1352   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1353     HasCancel = OPFD->hasCancel();
1354   else if (const auto *OPFD =
1355                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1356     HasCancel = OPFD->hasCancel();
1357   else if (const auto *OPFD =
1358                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1359     HasCancel = OPFD->hasCancel();
1360   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1361                                     HasCancel, OutlinedHelperName);
1362   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1363   return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
1364 }
1365 
1366 llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction(
1367     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1368     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1369   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1370   return emitParallelOrTeamsOutlinedFunction(
1371       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1372 }
1373 
1374 llvm::Value *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1375     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1376     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1377   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1378   return emitParallelOrTeamsOutlinedFunction(
1379       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1380 }
1381 
1382 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
1383     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1384     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1385     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1386     bool Tied, unsigned &NumberOfParts) {
1387   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1388                                               PrePostActionTy &) {
1389     llvm::Value *ThreadID = getThreadID(CGF, D.getLocStart());
1390     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getLocStart());
1391     llvm::Value *TaskArgs[] = {
1392         UpLoc, ThreadID,
1393         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1394                                     TaskTVar->getType()->castAs<PointerType>())
1395             .getPointer()};
1396     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1397   };
1398   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1399                                                             UntiedCodeGen);
1400   CodeGen.setAction(Action);
1401   assert(!ThreadIDVar->getType()->isPointerType() &&
1402          "thread id variable must be of type kmp_int32 for tasks");
1403   const OpenMPDirectiveKind Region =
1404       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1405                                                       : OMPD_task;
1406   const CapturedStmt *CS = D.getCapturedStmt(Region);
1407   const auto *TD = dyn_cast<OMPTaskDirective>(&D);
1408   CodeGenFunction CGF(CGM, true);
1409   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1410                                         InnermostKind,
1411                                         TD ? TD->hasCancel() : false, Action);
1412   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1413   llvm::Value *Res = CGF.GenerateCapturedStmtFunction(*CS);
1414   if (!Tied)
1415     NumberOfParts = Action.getNumberOfParts();
1416   return Res;
1417 }
1418 
1419 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1420                              const RecordDecl *RD, const CGRecordLayout &RL,
1421                              ArrayRef<llvm::Constant *> Data) {
1422   llvm::StructType *StructTy = RL.getLLVMType();
1423   unsigned PrevIdx = 0;
1424   ConstantInitBuilder CIBuilder(CGM);
1425   auto DI = Data.begin();
1426   for (const FieldDecl *FD : RD->fields()) {
1427     unsigned Idx = RL.getLLVMFieldNo(FD);
1428     // Fill the alignment.
1429     for (unsigned I = PrevIdx; I < Idx; ++I)
1430       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1431     PrevIdx = Idx + 1;
1432     Fields.add(*DI);
1433     ++DI;
1434   }
1435 }
1436 
1437 template <class... As>
1438 static llvm::GlobalVariable *
1439 createConstantGlobalStruct(CodeGenModule &CGM, QualType Ty,
1440                            ArrayRef<llvm::Constant *> Data, const Twine &Name,
1441                            As &&... Args) {
1442   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1443   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1444   ConstantInitBuilder CIBuilder(CGM);
1445   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1446   buildStructValue(Fields, CGM, RD, RL, Data);
1447   return Fields.finishAndCreateGlobal(
1448       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty),
1449       /*isConstant=*/true, std::forward<As>(Args)...);
1450 }
1451 
1452 template <typename T>
1453 void createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1454                                               ArrayRef<llvm::Constant *> Data,
1455                                               T &Parent) {
1456   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1457   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1458   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1459   buildStructValue(Fields, CGM, RD, RL, Data);
1460   Fields.finishAndAddTo(Parent);
1461 }
1462 
1463 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1464   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1465   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
1466   if (!Entry) {
1467     if (!DefaultOpenMPPSource) {
1468       // Initialize default location for psource field of ident_t structure of
1469       // all ident_t objects. Format is ";file;function;line;column;;".
1470       // Taken from
1471       // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
1472       DefaultOpenMPPSource =
1473           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1474       DefaultOpenMPPSource =
1475           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1476     }
1477 
1478     llvm::Constant *Data[] = {llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1479                               llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1480                               llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1481                               llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1482                               DefaultOpenMPPSource};
1483     llvm::GlobalValue *DefaultOpenMPLocation = createConstantGlobalStruct(
1484         CGM, IdentQTy, Data, "", llvm::GlobalValue::PrivateLinkage);
1485     DefaultOpenMPLocation->setUnnamedAddr(
1486         llvm::GlobalValue::UnnamedAddr::Global);
1487 
1488     OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
1489   }
1490   return Address(Entry, Align);
1491 }
1492 
1493 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1494                                                  SourceLocation Loc,
1495                                                  unsigned Flags) {
1496   Flags |= OMP_IDENT_KMPC;
1497   // If no debug info is generated - return global default location.
1498   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1499       Loc.isInvalid())
1500     return getOrCreateDefaultLocation(Flags).getPointer();
1501 
1502   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1503 
1504   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1505   Address LocValue = Address::invalid();
1506   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1507   if (I != OpenMPLocThreadIDMap.end())
1508     LocValue = Address(I->second.DebugLoc, Align);
1509 
1510   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1511   // GetOpenMPThreadID was called before this routine.
1512   if (!LocValue.isValid()) {
1513     // Generate "ident_t .kmpc_loc.addr;"
1514     Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1515     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1516     Elem.second.DebugLoc = AI.getPointer();
1517     LocValue = AI;
1518 
1519     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1520     CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
1521     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1522                              CGF.getTypeSize(IdentQTy));
1523   }
1524 
1525   // char **psource = &.kmpc_loc_<flags>.addr.psource;
1526   LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1527   auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1528   LValue PSource =
1529       CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1530 
1531   llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1532   if (OMPDebugLoc == nullptr) {
1533     SmallString<128> Buffer2;
1534     llvm::raw_svector_ostream OS2(Buffer2);
1535     // Build debug location
1536     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1537     OS2 << ";" << PLoc.getFilename() << ";";
1538     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1539       OS2 << FD->getQualifiedNameAsString();
1540     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1541     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1542     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1543   }
1544   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1545   CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1546 
1547   // Our callers always pass this to a runtime function, so for
1548   // convenience, go ahead and return a naked pointer.
1549   return LocValue.getPointer();
1550 }
1551 
1552 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1553                                           SourceLocation Loc) {
1554   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1555 
1556   llvm::Value *ThreadID = nullptr;
1557   // Check whether we've already cached a load of the thread id in this
1558   // function.
1559   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1560   if (I != OpenMPLocThreadIDMap.end()) {
1561     ThreadID = I->second.ThreadID;
1562     if (ThreadID != nullptr)
1563       return ThreadID;
1564   }
1565   // If exceptions are enabled, do not use parameter to avoid possible crash.
1566   if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1567       !CGF.getLangOpts().CXXExceptions ||
1568       CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1569     if (auto *OMPRegionInfo =
1570             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1571       if (OMPRegionInfo->getThreadIDVariable()) {
1572         // Check if this an outlined function with thread id passed as argument.
1573         LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1574         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1575         // If value loaded in entry block, cache it and use it everywhere in
1576         // function.
1577         if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1578           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1579           Elem.second.ThreadID = ThreadID;
1580         }
1581         return ThreadID;
1582       }
1583     }
1584   }
1585 
1586   // This is not an outlined function region - need to call __kmpc_int32
1587   // kmpc_global_thread_num(ident_t *loc).
1588   // Generate thread id value and cache this value for use across the
1589   // function.
1590   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1591   CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
1592   llvm::CallInst *Call = CGF.Builder.CreateCall(
1593       createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1594       emitUpdateLocation(CGF, Loc));
1595   Call->setCallingConv(CGF.getRuntimeCC());
1596   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1597   Elem.second.ThreadID = Call;
1598   return Call;
1599 }
1600 
1601 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1602   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1603   if (OpenMPLocThreadIDMap.count(CGF.CurFn))
1604     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1605   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1606     for(auto *D : FunctionUDRMap[CGF.CurFn])
1607       UDRMap.erase(D);
1608     FunctionUDRMap.erase(CGF.CurFn);
1609   }
1610 }
1611 
1612 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1613   return IdentTy->getPointerTo();
1614 }
1615 
1616 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1617   if (!Kmpc_MicroTy) {
1618     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1619     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1620                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1621     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1622   }
1623   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1624 }
1625 
1626 llvm::Constant *
1627 CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1628   llvm::Constant *RTLFn = nullptr;
1629   switch (static_cast<OpenMPRTLFunction>(Function)) {
1630   case OMPRTL__kmpc_fork_call: {
1631     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1632     // microtask, ...);
1633     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1634                                 getKmpc_MicroPointerTy()};
1635     auto *FnTy =
1636         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1637     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1638     break;
1639   }
1640   case OMPRTL__kmpc_global_thread_num: {
1641     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1642     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1643     auto *FnTy =
1644         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1645     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1646     break;
1647   }
1648   case OMPRTL__kmpc_threadprivate_cached: {
1649     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1650     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1651     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1652                                 CGM.VoidPtrTy, CGM.SizeTy,
1653                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1654     auto *FnTy =
1655         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1656     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1657     break;
1658   }
1659   case OMPRTL__kmpc_critical: {
1660     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1661     // kmp_critical_name *crit);
1662     llvm::Type *TypeParams[] = {
1663         getIdentTyPointerTy(), CGM.Int32Ty,
1664         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1665     auto *FnTy =
1666         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1667     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1668     break;
1669   }
1670   case OMPRTL__kmpc_critical_with_hint: {
1671     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1672     // kmp_critical_name *crit, uintptr_t hint);
1673     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1674                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
1675                                 CGM.IntPtrTy};
1676     auto *FnTy =
1677         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1678     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1679     break;
1680   }
1681   case OMPRTL__kmpc_threadprivate_register: {
1682     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1683     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1684     // typedef void *(*kmpc_ctor)(void *);
1685     auto *KmpcCtorTy =
1686         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1687                                 /*isVarArg*/ false)->getPointerTo();
1688     // typedef void *(*kmpc_cctor)(void *, void *);
1689     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1690     auto *KmpcCopyCtorTy =
1691         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1692                                 /*isVarArg*/ false)
1693             ->getPointerTo();
1694     // typedef void (*kmpc_dtor)(void *);
1695     auto *KmpcDtorTy =
1696         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1697             ->getPointerTo();
1698     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1699                               KmpcCopyCtorTy, KmpcDtorTy};
1700     auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1701                                         /*isVarArg*/ false);
1702     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1703     break;
1704   }
1705   case OMPRTL__kmpc_end_critical: {
1706     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1707     // kmp_critical_name *crit);
1708     llvm::Type *TypeParams[] = {
1709         getIdentTyPointerTy(), CGM.Int32Ty,
1710         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1711     auto *FnTy =
1712         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1713     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1714     break;
1715   }
1716   case OMPRTL__kmpc_cancel_barrier: {
1717     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1718     // global_tid);
1719     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1720     auto *FnTy =
1721         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1722     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1723     break;
1724   }
1725   case OMPRTL__kmpc_barrier: {
1726     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1727     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1728     auto *FnTy =
1729         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1730     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1731     break;
1732   }
1733   case OMPRTL__kmpc_for_static_fini: {
1734     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1735     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1736     auto *FnTy =
1737         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1738     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1739     break;
1740   }
1741   case OMPRTL__kmpc_push_num_threads: {
1742     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1743     // kmp_int32 num_threads)
1744     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1745                                 CGM.Int32Ty};
1746     auto *FnTy =
1747         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1748     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1749     break;
1750   }
1751   case OMPRTL__kmpc_serialized_parallel: {
1752     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1753     // global_tid);
1754     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1755     auto *FnTy =
1756         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1757     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1758     break;
1759   }
1760   case OMPRTL__kmpc_end_serialized_parallel: {
1761     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1762     // global_tid);
1763     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1764     auto *FnTy =
1765         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1766     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1767     break;
1768   }
1769   case OMPRTL__kmpc_flush: {
1770     // Build void __kmpc_flush(ident_t *loc);
1771     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1772     auto *FnTy =
1773         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1774     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1775     break;
1776   }
1777   case OMPRTL__kmpc_master: {
1778     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1779     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1780     auto *FnTy =
1781         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1782     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1783     break;
1784   }
1785   case OMPRTL__kmpc_end_master: {
1786     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1787     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1788     auto *FnTy =
1789         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1790     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1791     break;
1792   }
1793   case OMPRTL__kmpc_omp_taskyield: {
1794     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1795     // int end_part);
1796     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1797     auto *FnTy =
1798         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1799     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1800     break;
1801   }
1802   case OMPRTL__kmpc_single: {
1803     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1804     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1805     auto *FnTy =
1806         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1807     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1808     break;
1809   }
1810   case OMPRTL__kmpc_end_single: {
1811     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1812     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1813     auto *FnTy =
1814         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1815     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1816     break;
1817   }
1818   case OMPRTL__kmpc_omp_task_alloc: {
1819     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1820     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1821     // kmp_routine_entry_t *task_entry);
1822     assert(KmpRoutineEntryPtrTy != nullptr &&
1823            "Type kmp_routine_entry_t must be created.");
1824     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1825                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
1826     // Return void * and then cast to particular kmp_task_t type.
1827     auto *FnTy =
1828         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1829     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
1830     break;
1831   }
1832   case OMPRTL__kmpc_omp_task: {
1833     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1834     // *new_task);
1835     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1836                                 CGM.VoidPtrTy};
1837     auto *FnTy =
1838         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1839     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
1840     break;
1841   }
1842   case OMPRTL__kmpc_copyprivate: {
1843     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
1844     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
1845     // kmp_int32 didit);
1846     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1847     auto *CpyFnTy =
1848         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
1849     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
1850                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
1851                                 CGM.Int32Ty};
1852     auto *FnTy =
1853         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1854     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
1855     break;
1856   }
1857   case OMPRTL__kmpc_reduce: {
1858     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
1859     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
1860     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
1861     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1862     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1863                                                /*isVarArg=*/false);
1864     llvm::Type *TypeParams[] = {
1865         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1866         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1867         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1868     auto *FnTy =
1869         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1870     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
1871     break;
1872   }
1873   case OMPRTL__kmpc_reduce_nowait: {
1874     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
1875     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
1876     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
1877     // *lck);
1878     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1879     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1880                                                /*isVarArg=*/false);
1881     llvm::Type *TypeParams[] = {
1882         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1883         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1884         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1885     auto *FnTy =
1886         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1887     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
1888     break;
1889   }
1890   case OMPRTL__kmpc_end_reduce: {
1891     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
1892     // kmp_critical_name *lck);
1893     llvm::Type *TypeParams[] = {
1894         getIdentTyPointerTy(), CGM.Int32Ty,
1895         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1896     auto *FnTy =
1897         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1898     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
1899     break;
1900   }
1901   case OMPRTL__kmpc_end_reduce_nowait: {
1902     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
1903     // kmp_critical_name *lck);
1904     llvm::Type *TypeParams[] = {
1905         getIdentTyPointerTy(), CGM.Int32Ty,
1906         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1907     auto *FnTy =
1908         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1909     RTLFn =
1910         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
1911     break;
1912   }
1913   case OMPRTL__kmpc_omp_task_begin_if0: {
1914     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1915     // *new_task);
1916     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1917                                 CGM.VoidPtrTy};
1918     auto *FnTy =
1919         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1920     RTLFn =
1921         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
1922     break;
1923   }
1924   case OMPRTL__kmpc_omp_task_complete_if0: {
1925     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1926     // *new_task);
1927     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1928                                 CGM.VoidPtrTy};
1929     auto *FnTy =
1930         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1931     RTLFn = CGM.CreateRuntimeFunction(FnTy,
1932                                       /*Name=*/"__kmpc_omp_task_complete_if0");
1933     break;
1934   }
1935   case OMPRTL__kmpc_ordered: {
1936     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
1937     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1938     auto *FnTy =
1939         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1940     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
1941     break;
1942   }
1943   case OMPRTL__kmpc_end_ordered: {
1944     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
1945     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1946     auto *FnTy =
1947         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1948     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
1949     break;
1950   }
1951   case OMPRTL__kmpc_omp_taskwait: {
1952     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
1953     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1954     auto *FnTy =
1955         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1956     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
1957     break;
1958   }
1959   case OMPRTL__kmpc_taskgroup: {
1960     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
1961     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1962     auto *FnTy =
1963         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1964     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
1965     break;
1966   }
1967   case OMPRTL__kmpc_end_taskgroup: {
1968     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
1969     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1970     auto *FnTy =
1971         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1972     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
1973     break;
1974   }
1975   case OMPRTL__kmpc_push_proc_bind: {
1976     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
1977     // int proc_bind)
1978     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1979     auto *FnTy =
1980         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1981     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
1982     break;
1983   }
1984   case OMPRTL__kmpc_omp_task_with_deps: {
1985     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
1986     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
1987     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
1988     llvm::Type *TypeParams[] = {
1989         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
1990         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
1991     auto *FnTy =
1992         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1993     RTLFn =
1994         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
1995     break;
1996   }
1997   case OMPRTL__kmpc_omp_wait_deps: {
1998     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
1999     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
2000     // kmp_depend_info_t *noalias_dep_list);
2001     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2002                                 CGM.Int32Ty,           CGM.VoidPtrTy,
2003                                 CGM.Int32Ty,           CGM.VoidPtrTy};
2004     auto *FnTy =
2005         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2006     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
2007     break;
2008   }
2009   case OMPRTL__kmpc_cancellationpoint: {
2010     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2011     // global_tid, kmp_int32 cncl_kind)
2012     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2013     auto *FnTy =
2014         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2015     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
2016     break;
2017   }
2018   case OMPRTL__kmpc_cancel: {
2019     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2020     // kmp_int32 cncl_kind)
2021     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2022     auto *FnTy =
2023         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2024     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
2025     break;
2026   }
2027   case OMPRTL__kmpc_push_num_teams: {
2028     // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
2029     // kmp_int32 num_teams, kmp_int32 num_threads)
2030     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2031         CGM.Int32Ty};
2032     auto *FnTy =
2033         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2034     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
2035     break;
2036   }
2037   case OMPRTL__kmpc_fork_teams: {
2038     // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
2039     // microtask, ...);
2040     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2041                                 getKmpc_MicroPointerTy()};
2042     auto *FnTy =
2043         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
2044     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
2045     break;
2046   }
2047   case OMPRTL__kmpc_taskloop: {
2048     // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
2049     // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
2050     // sched, kmp_uint64 grainsize, void *task_dup);
2051     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2052                                 CGM.IntTy,
2053                                 CGM.VoidPtrTy,
2054                                 CGM.IntTy,
2055                                 CGM.Int64Ty->getPointerTo(),
2056                                 CGM.Int64Ty->getPointerTo(),
2057                                 CGM.Int64Ty,
2058                                 CGM.IntTy,
2059                                 CGM.IntTy,
2060                                 CGM.Int64Ty,
2061                                 CGM.VoidPtrTy};
2062     auto *FnTy =
2063         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2064     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2065     break;
2066   }
2067   case OMPRTL__kmpc_doacross_init: {
2068     // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2069     // num_dims, struct kmp_dim *dims);
2070     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2071                                 CGM.Int32Ty,
2072                                 CGM.Int32Ty,
2073                                 CGM.VoidPtrTy};
2074     auto *FnTy =
2075         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2076     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2077     break;
2078   }
2079   case OMPRTL__kmpc_doacross_fini: {
2080     // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2081     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2082     auto *FnTy =
2083         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2084     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2085     break;
2086   }
2087   case OMPRTL__kmpc_doacross_post: {
2088     // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2089     // *vec);
2090     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2091                                 CGM.Int64Ty->getPointerTo()};
2092     auto *FnTy =
2093         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2094     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2095     break;
2096   }
2097   case OMPRTL__kmpc_doacross_wait: {
2098     // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2099     // *vec);
2100     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2101                                 CGM.Int64Ty->getPointerTo()};
2102     auto *FnTy =
2103         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2104     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2105     break;
2106   }
2107   case OMPRTL__kmpc_task_reduction_init: {
2108     // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2109     // *data);
2110     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2111     auto *FnTy =
2112         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2113     RTLFn =
2114         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2115     break;
2116   }
2117   case OMPRTL__kmpc_task_reduction_get_th_data: {
2118     // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2119     // *d);
2120     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2121     auto *FnTy =
2122         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2123     RTLFn = CGM.CreateRuntimeFunction(
2124         FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2125     break;
2126   }
2127   case OMPRTL__tgt_target: {
2128     // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2129     // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2130     // *arg_types);
2131     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2132                                 CGM.VoidPtrTy,
2133                                 CGM.Int32Ty,
2134                                 CGM.VoidPtrPtrTy,
2135                                 CGM.VoidPtrPtrTy,
2136                                 CGM.SizeTy->getPointerTo(),
2137                                 CGM.Int64Ty->getPointerTo()};
2138     auto *FnTy =
2139         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2140     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2141     break;
2142   }
2143   case OMPRTL__tgt_target_nowait: {
2144     // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2145     // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
2146     // int64_t *arg_types);
2147     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2148                                 CGM.VoidPtrTy,
2149                                 CGM.Int32Ty,
2150                                 CGM.VoidPtrPtrTy,
2151                                 CGM.VoidPtrPtrTy,
2152                                 CGM.SizeTy->getPointerTo(),
2153                                 CGM.Int64Ty->getPointerTo()};
2154     auto *FnTy =
2155         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2156     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2157     break;
2158   }
2159   case OMPRTL__tgt_target_teams: {
2160     // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2161     // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
2162     // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2163     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2164                                 CGM.VoidPtrTy,
2165                                 CGM.Int32Ty,
2166                                 CGM.VoidPtrPtrTy,
2167                                 CGM.VoidPtrPtrTy,
2168                                 CGM.SizeTy->getPointerTo(),
2169                                 CGM.Int64Ty->getPointerTo(),
2170                                 CGM.Int32Ty,
2171                                 CGM.Int32Ty};
2172     auto *FnTy =
2173         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2174     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2175     break;
2176   }
2177   case OMPRTL__tgt_target_teams_nowait: {
2178     // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2179     // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t
2180     // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2181     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2182                                 CGM.VoidPtrTy,
2183                                 CGM.Int32Ty,
2184                                 CGM.VoidPtrPtrTy,
2185                                 CGM.VoidPtrPtrTy,
2186                                 CGM.SizeTy->getPointerTo(),
2187                                 CGM.Int64Ty->getPointerTo(),
2188                                 CGM.Int32Ty,
2189                                 CGM.Int32Ty};
2190     auto *FnTy =
2191         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2192     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2193     break;
2194   }
2195   case OMPRTL__tgt_register_lib: {
2196     // Build void __tgt_register_lib(__tgt_bin_desc *desc);
2197     QualType ParamTy =
2198         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2199     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2200     auto *FnTy =
2201         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2202     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
2203     break;
2204   }
2205   case OMPRTL__tgt_unregister_lib: {
2206     // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
2207     QualType ParamTy =
2208         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2209     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2210     auto *FnTy =
2211         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2212     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
2213     break;
2214   }
2215   case OMPRTL__tgt_target_data_begin: {
2216     // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2217     // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2218     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2219                                 CGM.Int32Ty,
2220                                 CGM.VoidPtrPtrTy,
2221                                 CGM.VoidPtrPtrTy,
2222                                 CGM.SizeTy->getPointerTo(),
2223                                 CGM.Int64Ty->getPointerTo()};
2224     auto *FnTy =
2225         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2226     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2227     break;
2228   }
2229   case OMPRTL__tgt_target_data_begin_nowait: {
2230     // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2231     // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2232     // *arg_types);
2233     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2234                                 CGM.Int32Ty,
2235                                 CGM.VoidPtrPtrTy,
2236                                 CGM.VoidPtrPtrTy,
2237                                 CGM.SizeTy->getPointerTo(),
2238                                 CGM.Int64Ty->getPointerTo()};
2239     auto *FnTy =
2240         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2241     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2242     break;
2243   }
2244   case OMPRTL__tgt_target_data_end: {
2245     // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2246     // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2247     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2248                                 CGM.Int32Ty,
2249                                 CGM.VoidPtrPtrTy,
2250                                 CGM.VoidPtrPtrTy,
2251                                 CGM.SizeTy->getPointerTo(),
2252                                 CGM.Int64Ty->getPointerTo()};
2253     auto *FnTy =
2254         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2255     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2256     break;
2257   }
2258   case OMPRTL__tgt_target_data_end_nowait: {
2259     // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2260     // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2261     // *arg_types);
2262     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2263                                 CGM.Int32Ty,
2264                                 CGM.VoidPtrPtrTy,
2265                                 CGM.VoidPtrPtrTy,
2266                                 CGM.SizeTy->getPointerTo(),
2267                                 CGM.Int64Ty->getPointerTo()};
2268     auto *FnTy =
2269         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2270     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2271     break;
2272   }
2273   case OMPRTL__tgt_target_data_update: {
2274     // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2275     // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2276     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2277                                 CGM.Int32Ty,
2278                                 CGM.VoidPtrPtrTy,
2279                                 CGM.VoidPtrPtrTy,
2280                                 CGM.SizeTy->getPointerTo(),
2281                                 CGM.Int64Ty->getPointerTo()};
2282     auto *FnTy =
2283         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2284     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2285     break;
2286   }
2287   case OMPRTL__tgt_target_data_update_nowait: {
2288     // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2289     // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2290     // *arg_types);
2291     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2292                                 CGM.Int32Ty,
2293                                 CGM.VoidPtrPtrTy,
2294                                 CGM.VoidPtrPtrTy,
2295                                 CGM.SizeTy->getPointerTo(),
2296                                 CGM.Int64Ty->getPointerTo()};
2297     auto *FnTy =
2298         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2299     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2300     break;
2301   }
2302   }
2303   assert(RTLFn && "Unable to find OpenMP runtime function");
2304   return RTLFn;
2305 }
2306 
2307 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
2308                                                              bool IVSigned) {
2309   assert((IVSize == 32 || IVSize == 64) &&
2310          "IV size is not compatible with the omp runtime");
2311   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2312                                             : "__kmpc_for_static_init_4u")
2313                                 : (IVSigned ? "__kmpc_for_static_init_8"
2314                                             : "__kmpc_for_static_init_8u");
2315   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2316   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2317   llvm::Type *TypeParams[] = {
2318     getIdentTyPointerTy(),                     // loc
2319     CGM.Int32Ty,                               // tid
2320     CGM.Int32Ty,                               // schedtype
2321     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2322     PtrTy,                                     // p_lower
2323     PtrTy,                                     // p_upper
2324     PtrTy,                                     // p_stride
2325     ITy,                                       // incr
2326     ITy                                        // chunk
2327   };
2328   auto *FnTy =
2329       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2330   return CGM.CreateRuntimeFunction(FnTy, Name);
2331 }
2332 
2333 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
2334                                                             bool IVSigned) {
2335   assert((IVSize == 32 || IVSize == 64) &&
2336          "IV size is not compatible with the omp runtime");
2337   StringRef Name =
2338       IVSize == 32
2339           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2340           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2341   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2342   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2343                                CGM.Int32Ty,           // tid
2344                                CGM.Int32Ty,           // schedtype
2345                                ITy,                   // lower
2346                                ITy,                   // upper
2347                                ITy,                   // stride
2348                                ITy                    // chunk
2349   };
2350   auto *FnTy =
2351       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2352   return CGM.CreateRuntimeFunction(FnTy, Name);
2353 }
2354 
2355 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
2356                                                             bool IVSigned) {
2357   assert((IVSize == 32 || IVSize == 64) &&
2358          "IV size is not compatible with the omp runtime");
2359   StringRef Name =
2360       IVSize == 32
2361           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2362           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2363   llvm::Type *TypeParams[] = {
2364       getIdentTyPointerTy(), // loc
2365       CGM.Int32Ty,           // tid
2366   };
2367   auto *FnTy =
2368       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2369   return CGM.CreateRuntimeFunction(FnTy, Name);
2370 }
2371 
2372 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
2373                                                             bool IVSigned) {
2374   assert((IVSize == 32 || IVSize == 64) &&
2375          "IV size is not compatible with the omp runtime");
2376   StringRef Name =
2377       IVSize == 32
2378           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2379           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2380   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2381   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2382   llvm::Type *TypeParams[] = {
2383     getIdentTyPointerTy(),                     // loc
2384     CGM.Int32Ty,                               // tid
2385     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2386     PtrTy,                                     // p_lower
2387     PtrTy,                                     // p_upper
2388     PtrTy                                      // p_stride
2389   };
2390   auto *FnTy =
2391       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2392   return CGM.CreateRuntimeFunction(FnTy, Name);
2393 }
2394 
2395 Address CGOpenMPRuntime::getAddrOfDeclareTargetLink(const VarDecl *VD) {
2396   if (CGM.getLangOpts().OpenMPSimd)
2397     return Address::invalid();
2398   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2399       isDeclareTargetDeclaration(VD);
2400   if (Res && *Res == OMPDeclareTargetDeclAttr::MT_Link) {
2401     SmallString<64> PtrName;
2402     {
2403       llvm::raw_svector_ostream OS(PtrName);
2404       OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_link_ptr";
2405     }
2406     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2407     if (!Ptr) {
2408       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2409       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
2410                                         PtrName);
2411       if (!CGM.getLangOpts().OpenMPIsDevice) {
2412         auto *GV = cast<llvm::GlobalVariable>(Ptr);
2413         GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
2414         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2415       }
2416       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ptr));
2417       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2418     }
2419     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2420   }
2421   return Address::invalid();
2422 }
2423 
2424 llvm::Constant *
2425 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
2426   assert(!CGM.getLangOpts().OpenMPUseTLS ||
2427          !CGM.getContext().getTargetInfo().isTLSSupported());
2428   // Lookup the entry, lazily creating it if necessary.
2429   return getOrCreateInternalVariable(CGM.Int8PtrPtrTy,
2430                                      Twine(CGM.getMangledName(VD), ".cache."));
2431 }
2432 
2433 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
2434                                                 const VarDecl *VD,
2435                                                 Address VDAddr,
2436                                                 SourceLocation Loc) {
2437   if (CGM.getLangOpts().OpenMPUseTLS &&
2438       CGM.getContext().getTargetInfo().isTLSSupported())
2439     return VDAddr;
2440 
2441   llvm::Type *VarTy = VDAddr.getElementType();
2442   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2443                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2444                                                        CGM.Int8PtrTy),
2445                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
2446                          getOrCreateThreadPrivateCache(VD)};
2447   return Address(CGF.EmitRuntimeCall(
2448       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2449                  VDAddr.getAlignment());
2450 }
2451 
2452 void CGOpenMPRuntime::emitThreadPrivateVarInit(
2453     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2454     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2455   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2456   // library.
2457   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
2458   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
2459                       OMPLoc);
2460   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2461   // to register constructor/destructor for variable.
2462   llvm::Value *Args[] = {
2463       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
2464       Ctor, CopyCtor, Dtor};
2465   CGF.EmitRuntimeCall(
2466       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
2467 }
2468 
2469 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
2470     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2471     bool PerformInit, CodeGenFunction *CGF) {
2472   if (CGM.getLangOpts().OpenMPUseTLS &&
2473       CGM.getContext().getTargetInfo().isTLSSupported())
2474     return nullptr;
2475 
2476   VD = VD->getDefinition(CGM.getContext());
2477   if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
2478     ThreadPrivateWithDefinition.insert(VD);
2479     QualType ASTTy = VD->getType();
2480 
2481     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2482     const Expr *Init = VD->getAnyInitializer();
2483     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2484       // Generate function that re-emits the declaration's initializer into the
2485       // threadprivate copy of the variable VD
2486       CodeGenFunction CtorCGF(CGM);
2487       FunctionArgList Args;
2488       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2489                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2490                             ImplicitParamDecl::Other);
2491       Args.push_back(&Dst);
2492 
2493       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2494           CGM.getContext().VoidPtrTy, Args);
2495       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2496       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2497           FTy, ".__kmpc_global_ctor_.", FI, Loc);
2498       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2499                             Args, Loc, Loc);
2500       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
2501           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2502           CGM.getContext().VoidPtrTy, Dst.getLocation());
2503       Address Arg = Address(ArgVal, VDAddr.getAlignment());
2504       Arg = CtorCGF.Builder.CreateElementBitCast(
2505           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2506       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2507                                /*IsInitializer=*/true);
2508       ArgVal = CtorCGF.EmitLoadOfScalar(
2509           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2510           CGM.getContext().VoidPtrTy, Dst.getLocation());
2511       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2512       CtorCGF.FinishFunction();
2513       Ctor = Fn;
2514     }
2515     if (VD->getType().isDestructedType() != QualType::DK_none) {
2516       // Generate function that emits destructor call for the threadprivate copy
2517       // of the variable VD
2518       CodeGenFunction DtorCGF(CGM);
2519       FunctionArgList Args;
2520       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2521                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2522                             ImplicitParamDecl::Other);
2523       Args.push_back(&Dst);
2524 
2525       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2526           CGM.getContext().VoidTy, Args);
2527       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2528       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2529           FTy, ".__kmpc_global_dtor_.", FI, Loc);
2530       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2531       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2532                             Loc, Loc);
2533       // Create a scope with an artificial location for the body of this function.
2534       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2535       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
2536           DtorCGF.GetAddrOfLocalVar(&Dst),
2537           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2538       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2539                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2540                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2541       DtorCGF.FinishFunction();
2542       Dtor = Fn;
2543     }
2544     // Do not emit init function if it is not required.
2545     if (!Ctor && !Dtor)
2546       return nullptr;
2547 
2548     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2549     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2550                                                /*isVarArg=*/false)
2551                            ->getPointerTo();
2552     // Copying constructor for the threadprivate variable.
2553     // Must be NULL - reserved by runtime, but currently it requires that this
2554     // parameter is always NULL. Otherwise it fires assertion.
2555     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2556     if (Ctor == nullptr) {
2557       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2558                                              /*isVarArg=*/false)
2559                          ->getPointerTo();
2560       Ctor = llvm::Constant::getNullValue(CtorTy);
2561     }
2562     if (Dtor == nullptr) {
2563       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2564                                              /*isVarArg=*/false)
2565                          ->getPointerTo();
2566       Dtor = llvm::Constant::getNullValue(DtorTy);
2567     }
2568     if (!CGF) {
2569       auto *InitFunctionTy =
2570           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2571       llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2572           InitFunctionTy, ".__omp_threadprivate_init_.",
2573           CGM.getTypes().arrangeNullaryFunction());
2574       CodeGenFunction InitCGF(CGM);
2575       FunctionArgList ArgList;
2576       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2577                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
2578                             Loc, Loc);
2579       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2580       InitCGF.FinishFunction();
2581       return InitFunction;
2582     }
2583     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2584   }
2585   return nullptr;
2586 }
2587 
2588 /// \brief Obtain information that uniquely identifies a target entry. This
2589 /// consists of the file and device IDs as well as line number associated with
2590 /// the relevant entry source location.
2591 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
2592                                      unsigned &DeviceID, unsigned &FileID,
2593                                      unsigned &LineNum) {
2594   SourceManager &SM = C.getSourceManager();
2595 
2596   // The loc should be always valid and have a file ID (the user cannot use
2597   // #pragma directives in macros)
2598 
2599   assert(Loc.isValid() && "Source location is expected to be always valid.");
2600 
2601   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2602   assert(PLoc.isValid() && "Source location is expected to be always valid.");
2603 
2604   llvm::sys::fs::UniqueID ID;
2605   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2606     SM.getDiagnostics().Report(diag::err_cannot_open_file)
2607         << PLoc.getFilename() << EC.message();
2608 
2609   DeviceID = ID.getDevice();
2610   FileID = ID.getFile();
2611   LineNum = PLoc.getLine();
2612 }
2613 
2614 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
2615                                                      llvm::GlobalVariable *Addr,
2616                                                      bool PerformInit) {
2617   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2618       isDeclareTargetDeclaration(VD);
2619   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link)
2620     return false;
2621   VD = VD->getDefinition(CGM.getContext());
2622   if (VD && !DeclareTargetWithDefinition.insert(VD).second)
2623     return CGM.getLangOpts().OpenMPIsDevice;
2624 
2625   QualType ASTTy = VD->getType();
2626 
2627   SourceLocation Loc = VD->getCanonicalDecl()->getLocStart();
2628   // Produce the unique prefix to identify the new target regions. We use
2629   // the source location of the variable declaration which we know to not
2630   // conflict with any target region.
2631   unsigned DeviceID;
2632   unsigned FileID;
2633   unsigned Line;
2634   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2635   SmallString<128> Buffer, Out;
2636   {
2637     llvm::raw_svector_ostream OS(Buffer);
2638     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2639        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2640   }
2641 
2642   const Expr *Init = VD->getAnyInitializer();
2643   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2644     llvm::Constant *Ctor;
2645     llvm::Constant *ID;
2646     if (CGM.getLangOpts().OpenMPIsDevice) {
2647       // Generate function that re-emits the declaration's initializer into
2648       // the threadprivate copy of the variable VD
2649       CodeGenFunction CtorCGF(CGM);
2650 
2651       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2652       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2653       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2654           FTy, Twine(Buffer, "_ctor"), FI, Loc);
2655       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2656       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2657                             FunctionArgList(), Loc, Loc);
2658       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2659       CtorCGF.EmitAnyExprToMem(Init,
2660                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
2661                                Init->getType().getQualifiers(),
2662                                /*IsInitializer=*/true);
2663       CtorCGF.FinishFunction();
2664       Ctor = Fn;
2665       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2666     } else {
2667       Ctor = new llvm::GlobalVariable(
2668           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2669           llvm::GlobalValue::PrivateLinkage,
2670           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2671       ID = Ctor;
2672     }
2673 
2674     // Register the information for the entry associated with the constructor.
2675     Out.clear();
2676     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2677         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2678         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
2679   }
2680   if (VD->getType().isDestructedType() != QualType::DK_none) {
2681     llvm::Constant *Dtor;
2682     llvm::Constant *ID;
2683     if (CGM.getLangOpts().OpenMPIsDevice) {
2684       // Generate function that emits destructor call for the threadprivate
2685       // copy of the variable VD
2686       CodeGenFunction DtorCGF(CGM);
2687 
2688       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2689       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2690       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2691           FTy, Twine(Buffer, "_dtor"), FI, Loc);
2692       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2693       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2694                             FunctionArgList(), Loc, Loc);
2695       // Create a scope with an artificial location for the body of this
2696       // function.
2697       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2698       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2699                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2700                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2701       DtorCGF.FinishFunction();
2702       Dtor = Fn;
2703       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2704     } else {
2705       Dtor = new llvm::GlobalVariable(
2706           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2707           llvm::GlobalValue::PrivateLinkage,
2708           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2709       ID = Dtor;
2710     }
2711     // Register the information for the entry associated with the destructor.
2712     Out.clear();
2713     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2714         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2715         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2716   }
2717   return CGM.getLangOpts().OpenMPIsDevice;
2718 }
2719 
2720 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2721                                                           QualType VarType,
2722                                                           StringRef Name) {
2723   llvm::Twine VarName(Name, ".artificial.");
2724   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2725   llvm::Value *GAddr = getOrCreateInternalVariable(VarLVType, VarName);
2726   llvm::Value *Args[] = {
2727       emitUpdateLocation(CGF, SourceLocation()),
2728       getThreadID(CGF, SourceLocation()),
2729       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2730       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2731                                 /*IsSigned=*/false),
2732       getOrCreateInternalVariable(CGM.VoidPtrPtrTy, VarName + ".cache.")};
2733   return Address(
2734       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2735           CGF.EmitRuntimeCall(
2736               createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2737           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2738       CGM.getPointerAlign());
2739 }
2740 
2741 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
2742 /// function. Here is the logic:
2743 /// if (Cond) {
2744 ///   ThenGen();
2745 /// } else {
2746 ///   ElseGen();
2747 /// }
2748 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
2749                                       const RegionCodeGenTy &ThenGen,
2750                                       const RegionCodeGenTy &ElseGen) {
2751   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2752 
2753   // If the condition constant folds and can be elided, try to avoid emitting
2754   // the condition and the dead arm of the if/else.
2755   bool CondConstant;
2756   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2757     if (CondConstant)
2758       ThenGen(CGF);
2759     else
2760       ElseGen(CGF);
2761     return;
2762   }
2763 
2764   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2765   // emit the conditional branch.
2766   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2767   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2768   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2769   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2770 
2771   // Emit the 'then' code.
2772   CGF.EmitBlock(ThenBlock);
2773   ThenGen(CGF);
2774   CGF.EmitBranch(ContBlock);
2775   // Emit the 'else' code if present.
2776   // There is no need to emit line number for unconditional branch.
2777   (void)ApplyDebugLocation::CreateEmpty(CGF);
2778   CGF.EmitBlock(ElseBlock);
2779   ElseGen(CGF);
2780   // There is no need to emit line number for unconditional branch.
2781   (void)ApplyDebugLocation::CreateEmpty(CGF);
2782   CGF.EmitBranch(ContBlock);
2783   // Emit the continuation block for code after the if.
2784   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2785 }
2786 
2787 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2788                                        llvm::Value *OutlinedFn,
2789                                        ArrayRef<llvm::Value *> CapturedVars,
2790                                        const Expr *IfCond) {
2791   if (!CGF.HaveInsertPoint())
2792     return;
2793   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2794   auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
2795                                                      PrePostActionTy &) {
2796     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2797     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2798     llvm::Value *Args[] = {
2799         RTLoc,
2800         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2801         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2802     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2803     RealArgs.append(std::begin(Args), std::end(Args));
2804     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2805 
2806     llvm::Value *RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
2807     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2808   };
2809   auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
2810                                                           PrePostActionTy &) {
2811     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2812     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2813     // Build calls:
2814     // __kmpc_serialized_parallel(&Loc, GTid);
2815     llvm::Value *Args[] = {RTLoc, ThreadID};
2816     CGF.EmitRuntimeCall(
2817         RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
2818 
2819     // OutlinedFn(&GTid, &zero, CapturedStruct);
2820     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2821     Address ZeroAddr =
2822         CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
2823                              /*Name*/ ".zero.addr");
2824     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
2825     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2826     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2827     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2828     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2829     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2830 
2831     // __kmpc_end_serialized_parallel(&Loc, GTid);
2832     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2833     CGF.EmitRuntimeCall(
2834         RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
2835         EndArgs);
2836   };
2837   if (IfCond) {
2838     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
2839   } else {
2840     RegionCodeGenTy ThenRCG(ThenGen);
2841     ThenRCG(CGF);
2842   }
2843 }
2844 
2845 // If we're inside an (outlined) parallel region, use the region info's
2846 // thread-ID variable (it is passed in a first argument of the outlined function
2847 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2848 // regular serial code region, get thread ID by calling kmp_int32
2849 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2850 // return the address of that temp.
2851 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2852                                              SourceLocation Loc) {
2853   if (auto *OMPRegionInfo =
2854           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2855     if (OMPRegionInfo->getThreadIDVariable())
2856       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
2857 
2858   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2859   QualType Int32Ty =
2860       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2861   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2862   CGF.EmitStoreOfScalar(ThreadID,
2863                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2864 
2865   return ThreadIDTemp;
2866 }
2867 
2868 llvm::Constant *
2869 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
2870                                              const llvm::Twine &Name) {
2871   SmallString<256> Buffer;
2872   llvm::raw_svector_ostream Out(Buffer);
2873   Out << Name;
2874   StringRef RuntimeName = Out.str();
2875   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2876   if (Elem.second) {
2877     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2878            "OMP internal variable has different type than requested");
2879     return &*Elem.second;
2880   }
2881 
2882   return Elem.second = new llvm::GlobalVariable(
2883              CGM.getModule(), Ty, /*IsConstant*/ false,
2884              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2885              Elem.first());
2886 }
2887 
2888 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2889   llvm::Twine Name(".gomp_critical_user_", CriticalName);
2890   return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
2891 }
2892 
2893 namespace {
2894 /// Common pre(post)-action for different OpenMP constructs.
2895 class CommonActionTy final : public PrePostActionTy {
2896   llvm::Value *EnterCallee;
2897   ArrayRef<llvm::Value *> EnterArgs;
2898   llvm::Value *ExitCallee;
2899   ArrayRef<llvm::Value *> ExitArgs;
2900   bool Conditional;
2901   llvm::BasicBlock *ContBlock = nullptr;
2902 
2903 public:
2904   CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs,
2905                  llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs,
2906                  bool Conditional = false)
2907       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2908         ExitArgs(ExitArgs), Conditional(Conditional) {}
2909   void Enter(CodeGenFunction &CGF) override {
2910     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2911     if (Conditional) {
2912       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2913       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2914       ContBlock = CGF.createBasicBlock("omp_if.end");
2915       // Generate the branch (If-stmt)
2916       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2917       CGF.EmitBlock(ThenBlock);
2918     }
2919   }
2920   void Done(CodeGenFunction &CGF) {
2921     // Emit the rest of blocks/branches
2922     CGF.EmitBranch(ContBlock);
2923     CGF.EmitBlock(ContBlock, true);
2924   }
2925   void Exit(CodeGenFunction &CGF) override {
2926     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2927   }
2928 };
2929 } // anonymous namespace
2930 
2931 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2932                                          StringRef CriticalName,
2933                                          const RegionCodeGenTy &CriticalOpGen,
2934                                          SourceLocation Loc, const Expr *Hint) {
2935   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2936   // CriticalOpGen();
2937   // __kmpc_end_critical(ident_t *, gtid, Lock);
2938   // Prepare arguments and build a call to __kmpc_critical
2939   if (!CGF.HaveInsertPoint())
2940     return;
2941   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2942                          getCriticalRegionLock(CriticalName)};
2943   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2944                                                 std::end(Args));
2945   if (Hint) {
2946     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2947         CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
2948   }
2949   CommonActionTy Action(
2950       createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
2951                                  : OMPRTL__kmpc_critical),
2952       EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
2953   CriticalOpGen.setAction(Action);
2954   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2955 }
2956 
2957 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2958                                        const RegionCodeGenTy &MasterOpGen,
2959                                        SourceLocation Loc) {
2960   if (!CGF.HaveInsertPoint())
2961     return;
2962   // if(__kmpc_master(ident_t *, gtid)) {
2963   //   MasterOpGen();
2964   //   __kmpc_end_master(ident_t *, gtid);
2965   // }
2966   // Prepare arguments and build a call to __kmpc_master
2967   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2968   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
2969                         createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
2970                         /*Conditional=*/true);
2971   MasterOpGen.setAction(Action);
2972   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2973   Action.Done(CGF);
2974 }
2975 
2976 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2977                                         SourceLocation Loc) {
2978   if (!CGF.HaveInsertPoint())
2979     return;
2980   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2981   llvm::Value *Args[] = {
2982       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2983       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2984   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
2985   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2986     Region->emitUntiedSwitch(CGF);
2987 }
2988 
2989 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2990                                           const RegionCodeGenTy &TaskgroupOpGen,
2991                                           SourceLocation Loc) {
2992   if (!CGF.HaveInsertPoint())
2993     return;
2994   // __kmpc_taskgroup(ident_t *, gtid);
2995   // TaskgroupOpGen();
2996   // __kmpc_end_taskgroup(ident_t *, gtid);
2997   // Prepare arguments and build a call to __kmpc_taskgroup
2998   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2999   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
3000                         createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
3001                         Args);
3002   TaskgroupOpGen.setAction(Action);
3003   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
3004 }
3005 
3006 /// Given an array of pointers to variables, project the address of a
3007 /// given variable.
3008 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
3009                                       unsigned Index, const VarDecl *Var) {
3010   // Pull out the pointer to the variable.
3011   Address PtrAddr =
3012       CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize());
3013   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
3014 
3015   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
3016   Addr = CGF.Builder.CreateElementBitCast(
3017       Addr, CGF.ConvertTypeForMem(Var->getType()));
3018   return Addr;
3019 }
3020 
3021 static llvm::Value *emitCopyprivateCopyFunction(
3022     CodeGenModule &CGM, llvm::Type *ArgsType,
3023     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
3024     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
3025     SourceLocation Loc) {
3026   ASTContext &C = CGM.getContext();
3027   // void copy_func(void *LHSArg, void *RHSArg);
3028   FunctionArgList Args;
3029   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3030                            ImplicitParamDecl::Other);
3031   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3032                            ImplicitParamDecl::Other);
3033   Args.push_back(&LHSArg);
3034   Args.push_back(&RHSArg);
3035   const auto &CGFI =
3036       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3037   auto *Fn = llvm::Function::Create(
3038       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
3039       ".omp.copyprivate.copy_func", &CGM.getModule());
3040   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
3041   Fn->setDoesNotRecurse();
3042   CodeGenFunction CGF(CGM);
3043   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
3044   // Dest = (void*[n])(LHSArg);
3045   // Src = (void*[n])(RHSArg);
3046   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3047       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3048       ArgsType), CGF.getPointerAlign());
3049   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3050       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3051       ArgsType), CGF.getPointerAlign());
3052   // *(Type0*)Dst[0] = *(Type0*)Src[0];
3053   // *(Type1*)Dst[1] = *(Type1*)Src[1];
3054   // ...
3055   // *(Typen*)Dst[n] = *(Typen*)Src[n];
3056   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
3057     const auto *DestVar =
3058         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3059     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3060 
3061     const auto *SrcVar =
3062         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3063     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3064 
3065     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3066     QualType Type = VD->getType();
3067     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3068   }
3069   CGF.FinishFunction();
3070   return Fn;
3071 }
3072 
3073 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
3074                                        const RegionCodeGenTy &SingleOpGen,
3075                                        SourceLocation Loc,
3076                                        ArrayRef<const Expr *> CopyprivateVars,
3077                                        ArrayRef<const Expr *> SrcExprs,
3078                                        ArrayRef<const Expr *> DstExprs,
3079                                        ArrayRef<const Expr *> AssignmentOps) {
3080   if (!CGF.HaveInsertPoint())
3081     return;
3082   assert(CopyprivateVars.size() == SrcExprs.size() &&
3083          CopyprivateVars.size() == DstExprs.size() &&
3084          CopyprivateVars.size() == AssignmentOps.size());
3085   ASTContext &C = CGM.getContext();
3086   // int32 did_it = 0;
3087   // if(__kmpc_single(ident_t *, gtid)) {
3088   //   SingleOpGen();
3089   //   __kmpc_end_single(ident_t *, gtid);
3090   //   did_it = 1;
3091   // }
3092   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3093   // <copy_func>, did_it);
3094 
3095   Address DidIt = Address::invalid();
3096   if (!CopyprivateVars.empty()) {
3097     // int32 did_it = 0;
3098     QualType KmpInt32Ty =
3099         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3100     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3101     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3102   }
3103   // Prepare arguments and build a call to __kmpc_single
3104   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3105   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3106                         createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
3107                         /*Conditional=*/true);
3108   SingleOpGen.setAction(Action);
3109   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3110   if (DidIt.isValid()) {
3111     // did_it = 1;
3112     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3113   }
3114   Action.Done(CGF);
3115   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3116   // <copy_func>, did_it);
3117   if (DidIt.isValid()) {
3118     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3119     QualType CopyprivateArrayTy =
3120         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
3121                                /*IndexTypeQuals=*/0);
3122     // Create a list of all private variables for copyprivate.
3123     Address CopyprivateList =
3124         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3125     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
3126       Address Elem = CGF.Builder.CreateConstArrayGEP(
3127           CopyprivateList, I, CGF.getPointerSize());
3128       CGF.Builder.CreateStore(
3129           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3130               CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
3131           Elem);
3132     }
3133     // Build function that copies private values from single region to all other
3134     // threads in the corresponding parallel region.
3135     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
3136         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
3137         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3138     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3139     Address CL =
3140       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3141                                                       CGF.VoidPtrTy);
3142     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
3143     llvm::Value *Args[] = {
3144         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3145         getThreadID(CGF, Loc),        // i32 <gtid>
3146         BufSize,                      // size_t <buf_size>
3147         CL.getPointer(),              // void *<copyprivate list>
3148         CpyFn,                        // void (*) (void *, void *) <copy_func>
3149         DidItVal                      // i32 did_it
3150     };
3151     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
3152   }
3153 }
3154 
3155 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
3156                                         const RegionCodeGenTy &OrderedOpGen,
3157                                         SourceLocation Loc, bool IsThreads) {
3158   if (!CGF.HaveInsertPoint())
3159     return;
3160   // __kmpc_ordered(ident_t *, gtid);
3161   // OrderedOpGen();
3162   // __kmpc_end_ordered(ident_t *, gtid);
3163   // Prepare arguments and build a call to __kmpc_ordered
3164   if (IsThreads) {
3165     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3166     CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3167                           createRuntimeFunction(OMPRTL__kmpc_end_ordered),
3168                           Args);
3169     OrderedOpGen.setAction(Action);
3170     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3171     return;
3172   }
3173   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3174 }
3175 
3176 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
3177                                       OpenMPDirectiveKind Kind, bool EmitChecks,
3178                                       bool ForceSimpleCall) {
3179   if (!CGF.HaveInsertPoint())
3180     return;
3181   // Build call __kmpc_cancel_barrier(loc, thread_id);
3182   // Build call __kmpc_barrier(loc, thread_id);
3183   unsigned Flags;
3184   if (Kind == OMPD_for)
3185     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3186   else if (Kind == OMPD_sections)
3187     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3188   else if (Kind == OMPD_single)
3189     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3190   else if (Kind == OMPD_barrier)
3191     Flags = OMP_IDENT_BARRIER_EXPL;
3192   else
3193     Flags = OMP_IDENT_BARRIER_IMPL;
3194   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3195   // thread_id);
3196   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3197                          getThreadID(CGF, Loc)};
3198   if (auto *OMPRegionInfo =
3199           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
3200     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
3201       llvm::Value *Result = CGF.EmitRuntimeCall(
3202           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
3203       if (EmitChecks) {
3204         // if (__kmpc_cancel_barrier()) {
3205         //   exit from construct;
3206         // }
3207         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
3208         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
3209         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
3210         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3211         CGF.EmitBlock(ExitBB);
3212         //   exit from construct;
3213         CodeGenFunction::JumpDest CancelDestination =
3214             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3215         CGF.EmitBranchThroughCleanup(CancelDestination);
3216         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3217       }
3218       return;
3219     }
3220   }
3221   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
3222 }
3223 
3224 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
3225 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
3226                                           bool Chunked, bool Ordered) {
3227   switch (ScheduleKind) {
3228   case OMPC_SCHEDULE_static:
3229     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
3230                    : (Ordered ? OMP_ord_static : OMP_sch_static);
3231   case OMPC_SCHEDULE_dynamic:
3232     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
3233   case OMPC_SCHEDULE_guided:
3234     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
3235   case OMPC_SCHEDULE_runtime:
3236     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
3237   case OMPC_SCHEDULE_auto:
3238     return Ordered ? OMP_ord_auto : OMP_sch_auto;
3239   case OMPC_SCHEDULE_unknown:
3240     assert(!Chunked && "chunk was specified but schedule kind not known");
3241     return Ordered ? OMP_ord_static : OMP_sch_static;
3242   }
3243   llvm_unreachable("Unexpected runtime schedule");
3244 }
3245 
3246 /// \brief Map the OpenMP distribute schedule to the runtime enumeration.
3247 static OpenMPSchedType
3248 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
3249   // only static is allowed for dist_schedule
3250   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
3251 }
3252 
3253 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
3254                                          bool Chunked) const {
3255   OpenMPSchedType Schedule =
3256       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3257   return Schedule == OMP_sch_static;
3258 }
3259 
3260 bool CGOpenMPRuntime::isStaticNonchunked(
3261     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3262   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3263   return Schedule == OMP_dist_sch_static;
3264 }
3265 
3266 
3267 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
3268   OpenMPSchedType Schedule =
3269       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3270   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3271   return Schedule != OMP_sch_static;
3272 }
3273 
3274 static int addMonoNonMonoModifier(OpenMPSchedType Schedule,
3275                                   OpenMPScheduleClauseModifier M1,
3276                                   OpenMPScheduleClauseModifier M2) {
3277   int Modifier = 0;
3278   switch (M1) {
3279   case OMPC_SCHEDULE_MODIFIER_monotonic:
3280     Modifier = OMP_sch_modifier_monotonic;
3281     break;
3282   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3283     Modifier = OMP_sch_modifier_nonmonotonic;
3284     break;
3285   case OMPC_SCHEDULE_MODIFIER_simd:
3286     if (Schedule == OMP_sch_static_chunked)
3287       Schedule = OMP_sch_static_balanced_chunked;
3288     break;
3289   case OMPC_SCHEDULE_MODIFIER_last:
3290   case OMPC_SCHEDULE_MODIFIER_unknown:
3291     break;
3292   }
3293   switch (M2) {
3294   case OMPC_SCHEDULE_MODIFIER_monotonic:
3295     Modifier = OMP_sch_modifier_monotonic;
3296     break;
3297   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3298     Modifier = OMP_sch_modifier_nonmonotonic;
3299     break;
3300   case OMPC_SCHEDULE_MODIFIER_simd:
3301     if (Schedule == OMP_sch_static_chunked)
3302       Schedule = OMP_sch_static_balanced_chunked;
3303     break;
3304   case OMPC_SCHEDULE_MODIFIER_last:
3305   case OMPC_SCHEDULE_MODIFIER_unknown:
3306     break;
3307   }
3308   return Schedule | Modifier;
3309 }
3310 
3311 void CGOpenMPRuntime::emitForDispatchInit(
3312     CodeGenFunction &CGF, SourceLocation Loc,
3313     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3314     bool Ordered, const DispatchRTInput &DispatchValues) {
3315   if (!CGF.HaveInsertPoint())
3316     return;
3317   OpenMPSchedType Schedule = getRuntimeSchedule(
3318       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3319   assert(Ordered ||
3320          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3321           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3322           Schedule != OMP_sch_static_balanced_chunked));
3323   // Call __kmpc_dispatch_init(
3324   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3325   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
3326   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
3327 
3328   // If the Chunk was not specified in the clause - use default value 1.
3329   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3330                                             : CGF.Builder.getIntN(IVSize, 1);
3331   llvm::Value *Args[] = {
3332       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3333       CGF.Builder.getInt32(addMonoNonMonoModifier(
3334           Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3335       DispatchValues.LB,                                // Lower
3336       DispatchValues.UB,                                // Upper
3337       CGF.Builder.getIntN(IVSize, 1),                   // Stride
3338       Chunk                                             // Chunk
3339   };
3340   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3341 }
3342 
3343 static void emitForStaticInitCall(
3344     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3345     llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule,
3346     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
3347     const CGOpenMPRuntime::StaticRTInput &Values) {
3348   if (!CGF.HaveInsertPoint())
3349     return;
3350 
3351   assert(!Values.Ordered);
3352   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3353          Schedule == OMP_sch_static_balanced_chunked ||
3354          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3355          Schedule == OMP_dist_sch_static ||
3356          Schedule == OMP_dist_sch_static_chunked);
3357 
3358   // Call __kmpc_for_static_init(
3359   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3360   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3361   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3362   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
3363   llvm::Value *Chunk = Values.Chunk;
3364   if (Chunk == nullptr) {
3365     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3366             Schedule == OMP_dist_sch_static) &&
3367            "expected static non-chunked schedule");
3368     // If the Chunk was not specified in the clause - use default value 1.
3369     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3370   } else {
3371     assert((Schedule == OMP_sch_static_chunked ||
3372             Schedule == OMP_sch_static_balanced_chunked ||
3373             Schedule == OMP_ord_static_chunked ||
3374             Schedule == OMP_dist_sch_static_chunked) &&
3375            "expected static chunked schedule");
3376   }
3377   llvm::Value *Args[] = {
3378       UpdateLocation,
3379       ThreadId,
3380       CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1,
3381                                                   M2)), // Schedule type
3382       Values.IL.getPointer(),                           // &isLastIter
3383       Values.LB.getPointer(),                           // &LB
3384       Values.UB.getPointer(),                           // &UB
3385       Values.ST.getPointer(),                           // &Stride
3386       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
3387       Chunk                                             // Chunk
3388   };
3389   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3390 }
3391 
3392 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
3393                                         SourceLocation Loc,
3394                                         OpenMPDirectiveKind DKind,
3395                                         const OpenMPScheduleTy &ScheduleKind,
3396                                         const StaticRTInput &Values) {
3397   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3398       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3399   assert(isOpenMPWorksharingDirective(DKind) &&
3400          "Expected loop-based or sections-based directive.");
3401   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3402                                              isOpenMPLoopDirective(DKind)
3403                                                  ? OMP_IDENT_WORK_LOOP
3404                                                  : OMP_IDENT_WORK_SECTIONS);
3405   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3406   llvm::Constant *StaticInitFunction =
3407       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3408   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3409                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3410 }
3411 
3412 void CGOpenMPRuntime::emitDistributeStaticInit(
3413     CodeGenFunction &CGF, SourceLocation Loc,
3414     OpenMPDistScheduleClauseKind SchedKind,
3415     const CGOpenMPRuntime::StaticRTInput &Values) {
3416   OpenMPSchedType ScheduleNum =
3417       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3418   llvm::Value *UpdatedLocation =
3419       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3420   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3421   llvm::Constant *StaticInitFunction =
3422       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3423   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3424                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3425                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
3426 }
3427 
3428 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
3429                                           SourceLocation Loc,
3430                                           OpenMPDirectiveKind DKind) {
3431   if (!CGF.HaveInsertPoint())
3432     return;
3433   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3434   llvm::Value *Args[] = {
3435       emitUpdateLocation(CGF, Loc,
3436                          isOpenMPDistributeDirective(DKind)
3437                              ? OMP_IDENT_WORK_DISTRIBUTE
3438                              : isOpenMPLoopDirective(DKind)
3439                                    ? OMP_IDENT_WORK_LOOP
3440                                    : OMP_IDENT_WORK_SECTIONS),
3441       getThreadID(CGF, Loc)};
3442   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
3443                       Args);
3444 }
3445 
3446 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
3447                                                  SourceLocation Loc,
3448                                                  unsigned IVSize,
3449                                                  bool IVSigned) {
3450   if (!CGF.HaveInsertPoint())
3451     return;
3452   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3453   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3454   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3455 }
3456 
3457 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
3458                                           SourceLocation Loc, unsigned IVSize,
3459                                           bool IVSigned, Address IL,
3460                                           Address LB, Address UB,
3461                                           Address ST) {
3462   // Call __kmpc_dispatch_next(
3463   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3464   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3465   //          kmp_int[32|64] *p_stride);
3466   llvm::Value *Args[] = {
3467       emitUpdateLocation(CGF, Loc),
3468       getThreadID(CGF, Loc),
3469       IL.getPointer(), // &isLastIter
3470       LB.getPointer(), // &Lower
3471       UB.getPointer(), // &Upper
3472       ST.getPointer()  // &Stride
3473   };
3474   llvm::Value *Call =
3475       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3476   return CGF.EmitScalarConversion(
3477       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
3478       CGF.getContext().BoolTy, Loc);
3479 }
3480 
3481 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
3482                                            llvm::Value *NumThreads,
3483                                            SourceLocation Loc) {
3484   if (!CGF.HaveInsertPoint())
3485     return;
3486   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3487   llvm::Value *Args[] = {
3488       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3489       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3490   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
3491                       Args);
3492 }
3493 
3494 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
3495                                          OpenMPProcBindClauseKind ProcBind,
3496                                          SourceLocation Loc) {
3497   if (!CGF.HaveInsertPoint())
3498     return;
3499   // Constants for proc bind value accepted by the runtime.
3500   enum ProcBindTy {
3501     ProcBindFalse = 0,
3502     ProcBindTrue,
3503     ProcBindMaster,
3504     ProcBindClose,
3505     ProcBindSpread,
3506     ProcBindIntel,
3507     ProcBindDefault
3508   } RuntimeProcBind;
3509   switch (ProcBind) {
3510   case OMPC_PROC_BIND_master:
3511     RuntimeProcBind = ProcBindMaster;
3512     break;
3513   case OMPC_PROC_BIND_close:
3514     RuntimeProcBind = ProcBindClose;
3515     break;
3516   case OMPC_PROC_BIND_spread:
3517     RuntimeProcBind = ProcBindSpread;
3518     break;
3519   case OMPC_PROC_BIND_unknown:
3520     llvm_unreachable("Unsupported proc_bind value.");
3521   }
3522   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3523   llvm::Value *Args[] = {
3524       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3525       llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
3526   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
3527 }
3528 
3529 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3530                                 SourceLocation Loc) {
3531   if (!CGF.HaveInsertPoint())
3532     return;
3533   // Build call void __kmpc_flush(ident_t *loc)
3534   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
3535                       emitUpdateLocation(CGF, Loc));
3536 }
3537 
3538 namespace {
3539 /// \brief Indexes of fields for type kmp_task_t.
3540 enum KmpTaskTFields {
3541   /// \brief List of shared variables.
3542   KmpTaskTShareds,
3543   /// \brief Task routine.
3544   KmpTaskTRoutine,
3545   /// \brief Partition id for the untied tasks.
3546   KmpTaskTPartId,
3547   /// Function with call of destructors for private variables.
3548   Data1,
3549   /// Task priority.
3550   Data2,
3551   /// (Taskloops only) Lower bound.
3552   KmpTaskTLowerBound,
3553   /// (Taskloops only) Upper bound.
3554   KmpTaskTUpperBound,
3555   /// (Taskloops only) Stride.
3556   KmpTaskTStride,
3557   /// (Taskloops only) Is last iteration flag.
3558   KmpTaskTLastIter,
3559   /// (Taskloops only) Reduction data.
3560   KmpTaskTReductions,
3561 };
3562 } // anonymous namespace
3563 
3564 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3565   return OffloadEntriesTargetRegion.empty() &&
3566          OffloadEntriesDeviceGlobalVar.empty();
3567 }
3568 
3569 /// \brief Initialize target region entry.
3570 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3571     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3572                                     StringRef ParentName, unsigned LineNum,
3573                                     unsigned Order) {
3574   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3575                                              "only required for the device "
3576                                              "code generation.");
3577   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3578       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3579                                    OMPTargetRegionEntryTargetRegion);
3580   ++OffloadingEntriesNum;
3581 }
3582 
3583 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3584     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3585                                   StringRef ParentName, unsigned LineNum,
3586                                   llvm::Constant *Addr, llvm::Constant *ID,
3587                                   OMPTargetRegionEntryKind Flags) {
3588   // If we are emitting code for a target, the entry is already initialized,
3589   // only has to be registered.
3590   if (CGM.getLangOpts().OpenMPIsDevice) {
3591     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3592       unsigned DiagID = CGM.getDiags().getCustomDiagID(
3593           DiagnosticsEngine::Error,
3594           "Unable to find target region on line '%0' in the device code.");
3595       CGM.getDiags().Report(DiagID) << LineNum;
3596       return;
3597     }
3598     auto &Entry =
3599         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3600     assert(Entry.isValid() && "Entry not initialized!");
3601     Entry.setAddress(Addr);
3602     Entry.setID(ID);
3603     Entry.setFlags(Flags);
3604   } else {
3605     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3606     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3607     ++OffloadingEntriesNum;
3608   }
3609 }
3610 
3611 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3612     unsigned DeviceID, unsigned FileID, StringRef ParentName,
3613     unsigned LineNum) const {
3614   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3615   if (PerDevice == OffloadEntriesTargetRegion.end())
3616     return false;
3617   auto PerFile = PerDevice->second.find(FileID);
3618   if (PerFile == PerDevice->second.end())
3619     return false;
3620   auto PerParentName = PerFile->second.find(ParentName);
3621   if (PerParentName == PerFile->second.end())
3622     return false;
3623   auto PerLine = PerParentName->second.find(LineNum);
3624   if (PerLine == PerParentName->second.end())
3625     return false;
3626   // Fail if this entry is already registered.
3627   if (PerLine->second.getAddress() || PerLine->second.getID())
3628     return false;
3629   return true;
3630 }
3631 
3632 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3633     const OffloadTargetRegionEntryInfoActTy &Action) {
3634   // Scan all target region entries and perform the provided action.
3635   for (const auto &D : OffloadEntriesTargetRegion)
3636     for (const auto &F : D.second)
3637       for (const auto &P : F.second)
3638         for (const auto &L : P.second)
3639           Action(D.first, F.first, P.first(), L.first, L.second);
3640 }
3641 
3642 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3643     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3644                                        OMPTargetGlobalVarEntryKind Flags,
3645                                        unsigned Order) {
3646   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3647                                              "only required for the device "
3648                                              "code generation.");
3649   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3650   ++OffloadingEntriesNum;
3651 }
3652 
3653 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3654     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3655                                      CharUnits VarSize,
3656                                      OMPTargetGlobalVarEntryKind Flags,
3657                                      llvm::GlobalValue::LinkageTypes Linkage) {
3658   if (CGM.getLangOpts().OpenMPIsDevice) {
3659     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3660     assert(Entry.isValid() && Entry.getFlags() == Flags &&
3661            "Entry not initialized!");
3662     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3663            "Resetting with the new address.");
3664     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName))
3665       return;
3666     Entry.setAddress(Addr);
3667     Entry.setVarSize(VarSize);
3668     Entry.setLinkage(Linkage);
3669   } else {
3670     if (hasDeviceGlobalVarEntryInfo(VarName))
3671       return;
3672     OffloadEntriesDeviceGlobalVar.try_emplace(
3673         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3674     ++OffloadingEntriesNum;
3675   }
3676 }
3677 
3678 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3679     actOnDeviceGlobalVarEntriesInfo(
3680         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3681   // Scan all target region entries and perform the provided action.
3682   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3683     Action(E.getKey(), E.getValue());
3684 }
3685 
3686 llvm::Function *
3687 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
3688   // If we don't have entries or if we are emitting code for the device, we
3689   // don't need to do anything.
3690   if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
3691     return nullptr;
3692 
3693   llvm::Module &M = CGM.getModule();
3694   ASTContext &C = CGM.getContext();
3695 
3696   // Get list of devices we care about
3697   const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples;
3698 
3699   // We should be creating an offloading descriptor only if there are devices
3700   // specified.
3701   assert(!Devices.empty() && "No OpenMP offloading devices??");
3702 
3703   // Create the external variables that will point to the begin and end of the
3704   // host entries section. These will be defined by the linker.
3705   llvm::Type *OffloadEntryTy =
3706       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
3707   auto *HostEntriesBegin = new llvm::GlobalVariable(
3708       M, OffloadEntryTy, /*isConstant=*/true,
3709       llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
3710       ".omp_offloading.entries_begin");
3711   auto *HostEntriesEnd = new llvm::GlobalVariable(
3712       M, OffloadEntryTy, /*isConstant=*/true,
3713       llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
3714       ".omp_offloading.entries_end");
3715 
3716   // Create all device images
3717   auto *DeviceImageTy = cast<llvm::StructType>(
3718       CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy()));
3719   ConstantInitBuilder DeviceImagesBuilder(CGM);
3720   ConstantArrayBuilder DeviceImagesEntries =
3721       DeviceImagesBuilder.beginArray(DeviceImageTy);
3722 
3723   for (const llvm::Triple &Device : Devices) {
3724     StringRef T = Device.getTriple();
3725     auto *ImgBegin = new llvm::GlobalVariable(
3726         M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
3727         /*Initializer=*/nullptr, Twine(".omp_offloading.img_start.", T));
3728     auto *ImgEnd = new llvm::GlobalVariable(
3729         M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
3730         /*Initializer=*/nullptr, Twine(".omp_offloading.img_end.", T));
3731 
3732     llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin,
3733                               HostEntriesEnd};
3734     createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data,
3735                                              DeviceImagesEntries);
3736   }
3737 
3738   // Create device images global array.
3739   llvm::GlobalVariable *DeviceImages =
3740     DeviceImagesEntries.finishAndCreateGlobal(".omp_offloading.device_images",
3741                                               CGM.getPointerAlign(),
3742                                               /*isConstant=*/true);
3743   DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3744 
3745   // This is a Zero array to be used in the creation of the constant expressions
3746   llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
3747                              llvm::Constant::getNullValue(CGM.Int32Ty)};
3748 
3749   // Create the target region descriptor.
3750   llvm::Constant *Data[] = {
3751       llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()),
3752       llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(),
3753                                            DeviceImages, Index),
3754       HostEntriesBegin, HostEntriesEnd};
3755   llvm::GlobalVariable *Desc = createConstantGlobalStruct(
3756       CGM, getTgtBinaryDescriptorQTy(), Data, ".omp_offloading.descriptor");
3757 
3758   // Emit code to register or unregister the descriptor at execution
3759   // startup or closing, respectively.
3760 
3761   llvm::Function *UnRegFn;
3762   {
3763     FunctionArgList Args;
3764     ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other);
3765     Args.push_back(&DummyPtr);
3766 
3767     CodeGenFunction CGF(CGM);
3768     // Disable debug info for global (de-)initializer because they are not part
3769     // of some particular construct.
3770     CGF.disableDebugInfo();
3771     const auto &FI =
3772         CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3773     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
3774     UnRegFn = CGM.CreateGlobalInitOrDestructFunction(
3775         FTy, ".omp_offloading.descriptor_unreg", FI);
3776     CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args);
3777     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
3778                         Desc);
3779     CGF.FinishFunction();
3780   }
3781   llvm::Function *RegFn;
3782   {
3783     CodeGenFunction CGF(CGM);
3784     // Disable debug info for global (de-)initializer because they are not part
3785     // of some particular construct.
3786     CGF.disableDebugInfo();
3787     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
3788     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
3789     RegFn = CGM.CreateGlobalInitOrDestructFunction(
3790         FTy, ".omp_offloading.descriptor_reg", FI);
3791     CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList());
3792     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc);
3793     // Create a variable to drive the registration and unregistration of the
3794     // descriptor, so we can reuse the logic that emits Ctors and Dtors.
3795     ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(),
3796                                   SourceLocation(), nullptr, C.CharTy,
3797                                   ImplicitParamDecl::Other);
3798     CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
3799     CGF.FinishFunction();
3800   }
3801   if (CGM.supportsCOMDAT()) {
3802     // It is sufficient to call registration function only once, so create a
3803     // COMDAT group for registration/unregistration functions and associated
3804     // data. That would reduce startup time and code size. Registration
3805     // function serves as a COMDAT group key.
3806     llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName());
3807     RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
3808     RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility);
3809     RegFn->setComdat(ComdatKey);
3810     UnRegFn->setComdat(ComdatKey);
3811     DeviceImages->setComdat(ComdatKey);
3812     Desc->setComdat(ComdatKey);
3813   }
3814   return RegFn;
3815 }
3816 
3817 void CGOpenMPRuntime::createOffloadEntry(
3818     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3819     llvm::GlobalValue::LinkageTypes Linkage) {
3820   StringRef Name = Addr->getName();
3821   llvm::Module &M = CGM.getModule();
3822   llvm::LLVMContext &C = M.getContext();
3823 
3824   // Create constant string with the name.
3825   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3826 
3827   auto *Str =
3828       new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true,
3829                                llvm::GlobalValue::InternalLinkage, StrPtrInit,
3830                                ".omp_offloading.entry_name");
3831   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3832 
3833   llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
3834                             llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
3835                             llvm::ConstantInt::get(CGM.SizeTy, Size),
3836                             llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3837                             llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3838   llvm::GlobalVariable *Entry = createConstantGlobalStruct(
3839       CGM, getTgtOffloadEntryQTy(), Data, Twine(".omp_offloading.entry.", Name),
3840       Linkage);
3841 
3842   // The entry has to be created in the section the linker expects it to be.
3843   Entry->setSection(".omp_offloading.entries");
3844 }
3845 
3846 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3847   // Emit the offloading entries and metadata so that the device codegen side
3848   // can easily figure out what to emit. The produced metadata looks like
3849   // this:
3850   //
3851   // !omp_offload.info = !{!1, ...}
3852   //
3853   // Right now we only generate metadata for function that contain target
3854   // regions.
3855 
3856   // If we do not have entries, we don't need to do anything.
3857   if (OffloadEntriesInfoManager.empty())
3858     return;
3859 
3860   llvm::Module &M = CGM.getModule();
3861   llvm::LLVMContext &C = M.getContext();
3862   SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
3863       OrderedEntries(OffloadEntriesInfoManager.size());
3864 
3865   // Auxiliary methods to create metadata values and strings.
3866   auto &&GetMDInt = [this](unsigned V) {
3867     return llvm::ConstantAsMetadata::get(
3868         llvm::ConstantInt::get(CGM.Int32Ty, V));
3869   };
3870 
3871   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3872 
3873   // Create the offloading info metadata node.
3874   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3875 
3876   // Create function that emits metadata for each target region entry;
3877   auto &&TargetRegionMetadataEmitter =
3878       [&C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
3879           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3880           unsigned Line,
3881           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3882         // Generate metadata for target regions. Each entry of this metadata
3883         // contains:
3884         // - Entry 0 -> Kind of this type of metadata (0).
3885         // - Entry 1 -> Device ID of the file where the entry was identified.
3886         // - Entry 2 -> File ID of the file where the entry was identified.
3887         // - Entry 3 -> Mangled name of the function where the entry was
3888         // identified.
3889         // - Entry 4 -> Line in the file where the entry was identified.
3890         // - Entry 5 -> Order the entry was created.
3891         // The first element of the metadata node is the kind.
3892         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3893                                  GetMDInt(FileID),      GetMDString(ParentName),
3894                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3895 
3896         // Save this entry in the right position of the ordered entries array.
3897         OrderedEntries[E.getOrder()] = &E;
3898 
3899         // Add metadata to the named metadata node.
3900         MD->addOperand(llvm::MDNode::get(C, Ops));
3901       };
3902 
3903   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3904       TargetRegionMetadataEmitter);
3905 
3906   // Create function that emits metadata for each device global variable entry;
3907   auto &&DeviceGlobalVarMetadataEmitter =
3908       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3909        MD](StringRef MangledName,
3910            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3911                &E) {
3912         // Generate metadata for global variables. Each entry of this metadata
3913         // contains:
3914         // - Entry 0 -> Kind of this type of metadata (1).
3915         // - Entry 1 -> Mangled name of the variable.
3916         // - Entry 2 -> Declare target kind.
3917         // - Entry 3 -> Order the entry was created.
3918         // The first element of the metadata node is the kind.
3919         llvm::Metadata *Ops[] = {
3920             GetMDInt(E.getKind()), GetMDString(MangledName),
3921             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3922 
3923         // Save this entry in the right position of the ordered entries array.
3924         OrderedEntries[E.getOrder()] = &E;
3925 
3926         // Add metadata to the named metadata node.
3927         MD->addOperand(llvm::MDNode::get(C, Ops));
3928       };
3929 
3930   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3931       DeviceGlobalVarMetadataEmitter);
3932 
3933   for (const auto *E : OrderedEntries) {
3934     assert(E && "All ordered entries must exist!");
3935     if (const auto *CE =
3936             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3937                 E)) {
3938       if (!CE->getID() || !CE->getAddress()) {
3939         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3940             DiagnosticsEngine::Error,
3941             "Offloading entry for target region is incorect: either the "
3942             "address or the ID is invalid.");
3943         CGM.getDiags().Report(DiagID);
3944         continue;
3945       }
3946       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3947                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3948     } else if (const auto *CE =
3949                    dyn_cast<OffloadEntriesInfoManagerTy::
3950                                 OffloadEntryInfoDeviceGlobalVar>(E)) {
3951       if (!CE->getAddress()) {
3952         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3953             DiagnosticsEngine::Error,
3954             "Offloading entry for declare target varible is inccorect: the "
3955             "address is invalid.");
3956         CGM.getDiags().Report(DiagID);
3957         continue;
3958       }
3959       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3960                          CE->getVarSize().getQuantity(), CE->getFlags(),
3961                          CE->getLinkage());
3962     } else {
3963       llvm_unreachable("Unsupported entry kind.");
3964     }
3965   }
3966 }
3967 
3968 /// \brief Loads all the offload entries information from the host IR
3969 /// metadata.
3970 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3971   // If we are in target mode, load the metadata from the host IR. This code has
3972   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3973 
3974   if (!CGM.getLangOpts().OpenMPIsDevice)
3975     return;
3976 
3977   if (CGM.getLangOpts().OMPHostIRFile.empty())
3978     return;
3979 
3980   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3981   if (auto EC = Buf.getError()) {
3982     CGM.getDiags().Report(diag::err_cannot_open_file)
3983         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3984     return;
3985   }
3986 
3987   llvm::LLVMContext C;
3988   auto ME = expectedToErrorOrAndEmitErrors(
3989       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3990 
3991   if (auto EC = ME.getError()) {
3992     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3993         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3994     CGM.getDiags().Report(DiagID)
3995         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3996     return;
3997   }
3998 
3999   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
4000   if (!MD)
4001     return;
4002 
4003   for (llvm::MDNode *MN : MD->operands()) {
4004     auto &&GetMDInt = [MN](unsigned Idx) {
4005       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
4006       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
4007     };
4008 
4009     auto &&GetMDString = [MN](unsigned Idx) {
4010       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
4011       return V->getString();
4012     };
4013 
4014     switch (GetMDInt(0)) {
4015     default:
4016       llvm_unreachable("Unexpected metadata!");
4017       break;
4018     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4019         OffloadingEntryInfoTargetRegion:
4020       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
4021           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
4022           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
4023           /*Order=*/GetMDInt(5));
4024       break;
4025     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4026         OffloadingEntryInfoDeviceGlobalVar:
4027       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
4028           /*MangledName=*/GetMDString(1),
4029           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4030               /*Flags=*/GetMDInt(2)),
4031           /*Order=*/GetMDInt(3));
4032       break;
4033     }
4034   }
4035 }
4036 
4037 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
4038   if (!KmpRoutineEntryPtrTy) {
4039     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
4040     ASTContext &C = CGM.getContext();
4041     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
4042     FunctionProtoType::ExtProtoInfo EPI;
4043     KmpRoutineEntryPtrQTy = C.getPointerType(
4044         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
4045     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
4046   }
4047 }
4048 
4049 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
4050   // Make sure the type of the entry is already created. This is the type we
4051   // have to create:
4052   // struct __tgt_offload_entry{
4053   //   void      *addr;       // Pointer to the offload entry info.
4054   //                          // (function or global)
4055   //   char      *name;       // Name of the function or global.
4056   //   size_t     size;       // Size of the entry info (0 if it a function).
4057   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
4058   //   int32_t    reserved;   // Reserved, to use by the runtime library.
4059   // };
4060   if (TgtOffloadEntryQTy.isNull()) {
4061     ASTContext &C = CGM.getContext();
4062     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
4063     RD->startDefinition();
4064     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4065     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
4066     addFieldToRecordDecl(C, RD, C.getSizeType());
4067     addFieldToRecordDecl(
4068         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4069     addFieldToRecordDecl(
4070         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4071     RD->completeDefinition();
4072     RD->addAttr(PackedAttr::CreateImplicit(C));
4073     TgtOffloadEntryQTy = C.getRecordType(RD);
4074   }
4075   return TgtOffloadEntryQTy;
4076 }
4077 
4078 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() {
4079   // These are the types we need to build:
4080   // struct __tgt_device_image{
4081   // void   *ImageStart;       // Pointer to the target code start.
4082   // void   *ImageEnd;         // Pointer to the target code end.
4083   // // We also add the host entries to the device image, as it may be useful
4084   // // for the target runtime to have access to that information.
4085   // __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all
4086   //                                       // the entries.
4087   // __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
4088   //                                       // entries (non inclusive).
4089   // };
4090   if (TgtDeviceImageQTy.isNull()) {
4091     ASTContext &C = CGM.getContext();
4092     RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image");
4093     RD->startDefinition();
4094     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4095     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4096     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4097     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4098     RD->completeDefinition();
4099     TgtDeviceImageQTy = C.getRecordType(RD);
4100   }
4101   return TgtDeviceImageQTy;
4102 }
4103 
4104 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() {
4105   // struct __tgt_bin_desc{
4106   //   int32_t              NumDevices;      // Number of devices supported.
4107   //   __tgt_device_image   *DeviceImages;   // Arrays of device images
4108   //                                         // (one per device).
4109   //   __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all the
4110   //                                         // entries.
4111   //   __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
4112   //                                         // entries (non inclusive).
4113   // };
4114   if (TgtBinaryDescriptorQTy.isNull()) {
4115     ASTContext &C = CGM.getContext();
4116     RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc");
4117     RD->startDefinition();
4118     addFieldToRecordDecl(
4119         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4120     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy()));
4121     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4122     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4123     RD->completeDefinition();
4124     TgtBinaryDescriptorQTy = C.getRecordType(RD);
4125   }
4126   return TgtBinaryDescriptorQTy;
4127 }
4128 
4129 namespace {
4130 struct PrivateHelpersTy {
4131   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
4132                    const VarDecl *PrivateElemInit)
4133       : Original(Original), PrivateCopy(PrivateCopy),
4134         PrivateElemInit(PrivateElemInit) {}
4135   const VarDecl *Original;
4136   const VarDecl *PrivateCopy;
4137   const VarDecl *PrivateElemInit;
4138 };
4139 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4140 } // anonymous namespace
4141 
4142 static RecordDecl *
4143 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
4144   if (!Privates.empty()) {
4145     ASTContext &C = CGM.getContext();
4146     // Build struct .kmp_privates_t. {
4147     //         /*  private vars  */
4148     //       };
4149     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
4150     RD->startDefinition();
4151     for (const auto &Pair : Privates) {
4152       const VarDecl *VD = Pair.second.Original;
4153       QualType Type = VD->getType().getNonReferenceType();
4154       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
4155       if (VD->hasAttrs()) {
4156         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4157              E(VD->getAttrs().end());
4158              I != E; ++I)
4159           FD->addAttr(*I);
4160       }
4161     }
4162     RD->completeDefinition();
4163     return RD;
4164   }
4165   return nullptr;
4166 }
4167 
4168 static RecordDecl *
4169 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
4170                          QualType KmpInt32Ty,
4171                          QualType KmpRoutineEntryPointerQTy) {
4172   ASTContext &C = CGM.getContext();
4173   // Build struct kmp_task_t {
4174   //         void *              shareds;
4175   //         kmp_routine_entry_t routine;
4176   //         kmp_int32           part_id;
4177   //         kmp_cmplrdata_t data1;
4178   //         kmp_cmplrdata_t data2;
4179   // For taskloops additional fields:
4180   //         kmp_uint64          lb;
4181   //         kmp_uint64          ub;
4182   //         kmp_int64           st;
4183   //         kmp_int32           liter;
4184   //         void *              reductions;
4185   //       };
4186   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4187   UD->startDefinition();
4188   addFieldToRecordDecl(C, UD, KmpInt32Ty);
4189   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4190   UD->completeDefinition();
4191   QualType KmpCmplrdataTy = C.getRecordType(UD);
4192   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
4193   RD->startDefinition();
4194   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4195   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4196   addFieldToRecordDecl(C, RD, KmpInt32Ty);
4197   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4198   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4199   if (isOpenMPTaskLoopDirective(Kind)) {
4200     QualType KmpUInt64Ty =
4201         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4202     QualType KmpInt64Ty =
4203         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4204     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4205     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4206     addFieldToRecordDecl(C, RD, KmpInt64Ty);
4207     addFieldToRecordDecl(C, RD, KmpInt32Ty);
4208     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4209   }
4210   RD->completeDefinition();
4211   return RD;
4212 }
4213 
4214 static RecordDecl *
4215 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
4216                                      ArrayRef<PrivateDataTy> Privates) {
4217   ASTContext &C = CGM.getContext();
4218   // Build struct kmp_task_t_with_privates {
4219   //         kmp_task_t task_data;
4220   //         .kmp_privates_t. privates;
4221   //       };
4222   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4223   RD->startDefinition();
4224   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4225   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
4226     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4227   RD->completeDefinition();
4228   return RD;
4229 }
4230 
4231 /// \brief Emit a proxy function which accepts kmp_task_t as the second
4232 /// argument.
4233 /// \code
4234 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4235 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4236 ///   For taskloops:
4237 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4238 ///   tt->reductions, tt->shareds);
4239 ///   return 0;
4240 /// }
4241 /// \endcode
4242 static llvm::Value *
4243 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
4244                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4245                       QualType KmpTaskTWithPrivatesPtrQTy,
4246                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4247                       QualType SharedsPtrTy, llvm::Value *TaskFunction,
4248                       llvm::Value *TaskPrivatesMap) {
4249   ASTContext &C = CGM.getContext();
4250   FunctionArgList Args;
4251   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4252                             ImplicitParamDecl::Other);
4253   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4254                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4255                                 ImplicitParamDecl::Other);
4256   Args.push_back(&GtidArg);
4257   Args.push_back(&TaskTypeArg);
4258   const auto &TaskEntryFnInfo =
4259       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4260   llvm::FunctionType *TaskEntryTy =
4261       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4262   auto *TaskEntry =
4263       llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
4264                              ".omp_task_entry.", &CGM.getModule());
4265   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4266   TaskEntry->setDoesNotRecurse();
4267   CodeGenFunction CGF(CGM);
4268   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4269                     Loc, Loc);
4270 
4271   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4272   // tt,
4273   // For taskloops:
4274   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4275   // tt->task_data.shareds);
4276   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
4277       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4278   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4279       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4280       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4281   const auto *KmpTaskTWithPrivatesQTyRD =
4282       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4283   LValue Base =
4284       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4285   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4286   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4287   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4288   llvm::Value *PartidParam = PartIdLVal.getPointer();
4289 
4290   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4291   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4292   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4293       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4294       CGF.ConvertTypeForMem(SharedsPtrTy));
4295 
4296   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4297   llvm::Value *PrivatesParam;
4298   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4299     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4300     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4301         PrivatesLVal.getPointer(), CGF.VoidPtrTy);
4302   } else {
4303     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4304   }
4305 
4306   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4307                                TaskPrivatesMap,
4308                                CGF.Builder
4309                                    .CreatePointerBitCastOrAddrSpaceCast(
4310                                        TDBase.getAddress(), CGF.VoidPtrTy)
4311                                    .getPointer()};
4312   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4313                                           std::end(CommonArgs));
4314   if (isOpenMPTaskLoopDirective(Kind)) {
4315     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4316     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4317     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4318     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4319     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4320     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4321     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4322     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
4323     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4324     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4325     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4326     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4327     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4328     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
4329     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4330     CallArgs.push_back(LBParam);
4331     CallArgs.push_back(UBParam);
4332     CallArgs.push_back(StParam);
4333     CallArgs.push_back(LIParam);
4334     CallArgs.push_back(RParam);
4335   }
4336   CallArgs.push_back(SharedsParam);
4337 
4338   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4339                                                   CallArgs);
4340   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4341                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4342   CGF.FinishFunction();
4343   return TaskEntry;
4344 }
4345 
4346 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
4347                                             SourceLocation Loc,
4348                                             QualType KmpInt32Ty,
4349                                             QualType KmpTaskTWithPrivatesPtrQTy,
4350                                             QualType KmpTaskTWithPrivatesQTy) {
4351   ASTContext &C = CGM.getContext();
4352   FunctionArgList Args;
4353   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4354                             ImplicitParamDecl::Other);
4355   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4356                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4357                                 ImplicitParamDecl::Other);
4358   Args.push_back(&GtidArg);
4359   Args.push_back(&TaskTypeArg);
4360   const auto &DestructorFnInfo =
4361       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4362   llvm::FunctionType *DestructorFnTy =
4363       CGM.getTypes().GetFunctionType(DestructorFnInfo);
4364   auto *DestructorFn =
4365       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
4366                              ".omp_task_destructor.", &CGM.getModule());
4367   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4368                                     DestructorFnInfo);
4369   DestructorFn->setDoesNotRecurse();
4370   CodeGenFunction CGF(CGM);
4371   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4372                     Args, Loc, Loc);
4373 
4374   LValue Base = CGF.EmitLoadOfPointerLValue(
4375       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4376       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4377   const auto *KmpTaskTWithPrivatesQTyRD =
4378       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4379   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4380   Base = CGF.EmitLValueForField(Base, *FI);
4381   for (const auto *Field :
4382        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4383     if (QualType::DestructionKind DtorKind =
4384             Field->getType().isDestructedType()) {
4385       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
4386       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
4387     }
4388   }
4389   CGF.FinishFunction();
4390   return DestructorFn;
4391 }
4392 
4393 /// \brief Emit a privates mapping function for correct handling of private and
4394 /// firstprivate variables.
4395 /// \code
4396 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4397 /// **noalias priv1,...,  <tyn> **noalias privn) {
4398 ///   *priv1 = &.privates.priv1;
4399 ///   ...;
4400 ///   *privn = &.privates.privn;
4401 /// }
4402 /// \endcode
4403 static llvm::Value *
4404 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
4405                                ArrayRef<const Expr *> PrivateVars,
4406                                ArrayRef<const Expr *> FirstprivateVars,
4407                                ArrayRef<const Expr *> LastprivateVars,
4408                                QualType PrivatesQTy,
4409                                ArrayRef<PrivateDataTy> Privates) {
4410   ASTContext &C = CGM.getContext();
4411   FunctionArgList Args;
4412   ImplicitParamDecl TaskPrivatesArg(
4413       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4414       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4415       ImplicitParamDecl::Other);
4416   Args.push_back(&TaskPrivatesArg);
4417   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4418   unsigned Counter = 1;
4419   for (const Expr *E : PrivateVars) {
4420     Args.push_back(ImplicitParamDecl::Create(
4421         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4422         C.getPointerType(C.getPointerType(E->getType()))
4423             .withConst()
4424             .withRestrict(),
4425         ImplicitParamDecl::Other));
4426     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4427     PrivateVarsPos[VD] = Counter;
4428     ++Counter;
4429   }
4430   for (const Expr *E : FirstprivateVars) {
4431     Args.push_back(ImplicitParamDecl::Create(
4432         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4433         C.getPointerType(C.getPointerType(E->getType()))
4434             .withConst()
4435             .withRestrict(),
4436         ImplicitParamDecl::Other));
4437     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4438     PrivateVarsPos[VD] = Counter;
4439     ++Counter;
4440   }
4441   for (const Expr *E : LastprivateVars) {
4442     Args.push_back(ImplicitParamDecl::Create(
4443         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4444         C.getPointerType(C.getPointerType(E->getType()))
4445             .withConst()
4446             .withRestrict(),
4447         ImplicitParamDecl::Other));
4448     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4449     PrivateVarsPos[VD] = Counter;
4450     ++Counter;
4451   }
4452   const auto &TaskPrivatesMapFnInfo =
4453       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4454   llvm::FunctionType *TaskPrivatesMapTy =
4455       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4456   auto *TaskPrivatesMap = llvm::Function::Create(
4457       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage,
4458       ".omp_task_privates_map.", &CGM.getModule());
4459   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
4460                                     TaskPrivatesMapFnInfo);
4461   TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4462   TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4463   TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4464   CodeGenFunction CGF(CGM);
4465   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4466                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
4467 
4468   // *privi = &.privates.privi;
4469   LValue Base = CGF.EmitLoadOfPointerLValue(
4470       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4471       TaskPrivatesArg.getType()->castAs<PointerType>());
4472   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4473   Counter = 0;
4474   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
4475     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
4476     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4477     LValue RefLVal =
4478         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4479     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4480         RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
4481     CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
4482     ++Counter;
4483   }
4484   CGF.FinishFunction();
4485   return TaskPrivatesMap;
4486 }
4487 
4488 static bool stable_sort_comparator(const PrivateDataTy P1,
4489                                    const PrivateDataTy P2) {
4490   return P1.first > P2.first;
4491 }
4492 
4493 /// Emit initialization for private variables in task-based directives.
4494 static void emitPrivatesInit(CodeGenFunction &CGF,
4495                              const OMPExecutableDirective &D,
4496                              Address KmpTaskSharedsPtr, LValue TDBase,
4497                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4498                              QualType SharedsTy, QualType SharedsPtrTy,
4499                              const OMPTaskDataTy &Data,
4500                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4501   ASTContext &C = CGF.getContext();
4502   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4503   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4504   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
4505                                  ? OMPD_taskloop
4506                                  : OMPD_task;
4507   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
4508   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
4509   LValue SrcBase;
4510   bool IsTargetTask =
4511       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
4512       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
4513   // For target-based directives skip 3 firstprivate arrays BasePointersArray,
4514   // PointersArray and SizesArray. The original variables for these arrays are
4515   // not captured and we get their addresses explicitly.
4516   if ((!IsTargetTask && !Data.FirstprivateVars.empty()) ||
4517       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
4518     SrcBase = CGF.MakeAddrLValue(
4519         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4520             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4521         SharedsTy);
4522   }
4523   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4524   for (const PrivateDataTy &Pair : Privates) {
4525     const VarDecl *VD = Pair.second.PrivateCopy;
4526     const Expr *Init = VD->getAnyInitializer();
4527     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4528                              !CGF.isTrivialInitializer(Init)))) {
4529       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4530       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
4531         const VarDecl *OriginalVD = Pair.second.Original;
4532         // Check if the variable is the target-based BasePointersArray,
4533         // PointersArray or SizesArray.
4534         LValue SharedRefLValue;
4535         QualType Type = OriginalVD->getType();
4536         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
4537         if (IsTargetTask && !SharedField) {
4538           assert(isa<ImplicitParamDecl>(OriginalVD) &&
4539                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
4540                  cast<CapturedDecl>(OriginalVD->getDeclContext())
4541                          ->getNumParams() == 0 &&
4542                  isa<TranslationUnitDecl>(
4543                      cast<CapturedDecl>(OriginalVD->getDeclContext())
4544                          ->getDeclContext()) &&
4545                  "Expected artificial target data variable.");
4546           SharedRefLValue =
4547               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
4548         } else {
4549           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4550           SharedRefLValue = CGF.MakeAddrLValue(
4551               Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
4552               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
4553               SharedRefLValue.getTBAAInfo());
4554         }
4555         if (Type->isArrayType()) {
4556           // Initialize firstprivate array.
4557           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4558             // Perform simple memcpy.
4559             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
4560           } else {
4561             // Initialize firstprivate array using element-by-element
4562             // initialization.
4563             CGF.EmitOMPAggregateAssign(
4564                 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
4565                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4566                                                   Address SrcElement) {
4567                   // Clean up any temporaries needed by the initialization.
4568                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
4569                   InitScope.addPrivate(
4570                       Elem, [SrcElement]() -> Address { return SrcElement; });
4571                   (void)InitScope.Privatize();
4572                   // Emit initialization for single element.
4573                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
4574                       CGF, &CapturesInfo);
4575                   CGF.EmitAnyExprToMem(Init, DestElement,
4576                                        Init->getType().getQualifiers(),
4577                                        /*IsInitializer=*/false);
4578                 });
4579           }
4580         } else {
4581           CodeGenFunction::OMPPrivateScope InitScope(CGF);
4582           InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
4583             return SharedRefLValue.getAddress();
4584           });
4585           (void)InitScope.Privatize();
4586           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4587           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4588                              /*capturedByInit=*/false);
4589         }
4590       } else {
4591         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4592       }
4593     }
4594     ++FI;
4595   }
4596 }
4597 
4598 /// Check if duplication function is required for taskloops.
4599 static bool checkInitIsRequired(CodeGenFunction &CGF,
4600                                 ArrayRef<PrivateDataTy> Privates) {
4601   bool InitRequired = false;
4602   for (const PrivateDataTy &Pair : Privates) {
4603     const VarDecl *VD = Pair.second.PrivateCopy;
4604     const Expr *Init = VD->getAnyInitializer();
4605     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4606                                     !CGF.isTrivialInitializer(Init));
4607     if (InitRequired)
4608       break;
4609   }
4610   return InitRequired;
4611 }
4612 
4613 
4614 /// Emit task_dup function (for initialization of
4615 /// private/firstprivate/lastprivate vars and last_iter flag)
4616 /// \code
4617 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4618 /// lastpriv) {
4619 /// // setup lastprivate flag
4620 ///    task_dst->last = lastpriv;
4621 /// // could be constructor calls here...
4622 /// }
4623 /// \endcode
4624 static llvm::Value *
4625 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4626                     const OMPExecutableDirective &D,
4627                     QualType KmpTaskTWithPrivatesPtrQTy,
4628                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4629                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4630                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4631                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4632   ASTContext &C = CGM.getContext();
4633   FunctionArgList Args;
4634   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4635                            KmpTaskTWithPrivatesPtrQTy,
4636                            ImplicitParamDecl::Other);
4637   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4638                            KmpTaskTWithPrivatesPtrQTy,
4639                            ImplicitParamDecl::Other);
4640   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4641                                 ImplicitParamDecl::Other);
4642   Args.push_back(&DstArg);
4643   Args.push_back(&SrcArg);
4644   Args.push_back(&LastprivArg);
4645   const auto &TaskDupFnInfo =
4646       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4647   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4648   auto *TaskDup =
4649       llvm::Function::Create(TaskDupTy, llvm::GlobalValue::InternalLinkage,
4650                              ".omp_task_dup.", &CGM.getModule());
4651   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4652   TaskDup->setDoesNotRecurse();
4653   CodeGenFunction CGF(CGM);
4654   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4655                     Loc);
4656 
4657   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4658       CGF.GetAddrOfLocalVar(&DstArg),
4659       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4660   // task_dst->liter = lastpriv;
4661   if (WithLastIter) {
4662     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4663     LValue Base = CGF.EmitLValueForField(
4664         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4665     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4666     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4667         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4668     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4669   }
4670 
4671   // Emit initial values for private copies (if any).
4672   assert(!Privates.empty());
4673   Address KmpTaskSharedsPtr = Address::invalid();
4674   if (!Data.FirstprivateVars.empty()) {
4675     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4676         CGF.GetAddrOfLocalVar(&SrcArg),
4677         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4678     LValue Base = CGF.EmitLValueForField(
4679         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4680     KmpTaskSharedsPtr = Address(
4681         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4682                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4683                                                   KmpTaskTShareds)),
4684                              Loc),
4685         CGF.getNaturalTypeAlignment(SharedsTy));
4686   }
4687   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4688                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4689   CGF.FinishFunction();
4690   return TaskDup;
4691 }
4692 
4693 /// Checks if destructor function is required to be generated.
4694 /// \return true if cleanups are required, false otherwise.
4695 static bool
4696 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4697   bool NeedsCleanup = false;
4698   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4699   const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4700   for (const FieldDecl *FD : PrivateRD->fields()) {
4701     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4702     if (NeedsCleanup)
4703       break;
4704   }
4705   return NeedsCleanup;
4706 }
4707 
4708 CGOpenMPRuntime::TaskResultTy
4709 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4710                               const OMPExecutableDirective &D,
4711                               llvm::Value *TaskFunction, QualType SharedsTy,
4712                               Address Shareds, const OMPTaskDataTy &Data) {
4713   ASTContext &C = CGM.getContext();
4714   llvm::SmallVector<PrivateDataTy, 4> Privates;
4715   // Aggregate privates and sort them by the alignment.
4716   auto I = Data.PrivateCopies.begin();
4717   for (const Expr *E : Data.PrivateVars) {
4718     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4719     Privates.emplace_back(
4720         C.getDeclAlign(VD),
4721         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4722                          /*PrivateElemInit=*/nullptr));
4723     ++I;
4724   }
4725   I = Data.FirstprivateCopies.begin();
4726   auto IElemInitRef = Data.FirstprivateInits.begin();
4727   for (const Expr *E : Data.FirstprivateVars) {
4728     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4729     Privates.emplace_back(
4730         C.getDeclAlign(VD),
4731         PrivateHelpersTy(
4732             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4733             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4734     ++I;
4735     ++IElemInitRef;
4736   }
4737   I = Data.LastprivateCopies.begin();
4738   for (const Expr *E : Data.LastprivateVars) {
4739     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4740     Privates.emplace_back(
4741         C.getDeclAlign(VD),
4742         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4743                          /*PrivateElemInit=*/nullptr));
4744     ++I;
4745   }
4746   std::stable_sort(Privates.begin(), Privates.end(), stable_sort_comparator);
4747   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4748   // Build type kmp_routine_entry_t (if not built yet).
4749   emitKmpRoutineEntryT(KmpInt32Ty);
4750   // Build type kmp_task_t (if not built yet).
4751   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4752     if (SavedKmpTaskloopTQTy.isNull()) {
4753       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4754           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4755     }
4756     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4757   } else {
4758     assert((D.getDirectiveKind() == OMPD_task ||
4759             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4760             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4761            "Expected taskloop, task or target directive");
4762     if (SavedKmpTaskTQTy.isNull()) {
4763       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4764           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4765     }
4766     KmpTaskTQTy = SavedKmpTaskTQTy;
4767   }
4768   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4769   // Build particular struct kmp_task_t for the given task.
4770   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4771       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4772   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4773   QualType KmpTaskTWithPrivatesPtrQTy =
4774       C.getPointerType(KmpTaskTWithPrivatesQTy);
4775   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4776   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4777       KmpTaskTWithPrivatesTy->getPointerTo();
4778   llvm::Value *KmpTaskTWithPrivatesTySize =
4779       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4780   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4781 
4782   // Emit initial values for private copies (if any).
4783   llvm::Value *TaskPrivatesMap = nullptr;
4784   llvm::Type *TaskPrivatesMapTy =
4785       std::next(cast<llvm::Function>(TaskFunction)->arg_begin(), 3)->getType();
4786   if (!Privates.empty()) {
4787     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4788     TaskPrivatesMap = emitTaskPrivateMappingFunction(
4789         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
4790         FI->getType(), Privates);
4791     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4792         TaskPrivatesMap, TaskPrivatesMapTy);
4793   } else {
4794     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4795         cast<llvm::PointerType>(TaskPrivatesMapTy));
4796   }
4797   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4798   // kmp_task_t *tt);
4799   llvm::Value *TaskEntry = emitProxyTaskFunction(
4800       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4801       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4802       TaskPrivatesMap);
4803 
4804   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4805   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4806   // kmp_routine_entry_t *task_entry);
4807   // Task flags. Format is taken from
4808   // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
4809   // description of kmp_tasking_flags struct.
4810   enum {
4811     TiedFlag = 0x1,
4812     FinalFlag = 0x2,
4813     DestructorsFlag = 0x8,
4814     PriorityFlag = 0x20
4815   };
4816   unsigned Flags = Data.Tied ? TiedFlag : 0;
4817   bool NeedsCleanup = false;
4818   if (!Privates.empty()) {
4819     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
4820     if (NeedsCleanup)
4821       Flags = Flags | DestructorsFlag;
4822   }
4823   if (Data.Priority.getInt())
4824     Flags = Flags | PriorityFlag;
4825   llvm::Value *TaskFlags =
4826       Data.Final.getPointer()
4827           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4828                                      CGF.Builder.getInt32(FinalFlag),
4829                                      CGF.Builder.getInt32(/*C=*/0))
4830           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4831   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4832   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4833   llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
4834                               getThreadID(CGF, Loc), TaskFlags,
4835                               KmpTaskTWithPrivatesTySize, SharedsSize,
4836                               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4837                                   TaskEntry, KmpRoutineEntryPtrTy)};
4838   llvm::Value *NewTask = CGF.EmitRuntimeCall(
4839       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
4840   llvm::Value *NewTaskNewTaskTTy =
4841       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4842           NewTask, KmpTaskTWithPrivatesPtrTy);
4843   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4844                                                KmpTaskTWithPrivatesQTy);
4845   LValue TDBase =
4846       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4847   // Fill the data in the resulting kmp_task_t record.
4848   // Copy shareds if there are any.
4849   Address KmpTaskSharedsPtr = Address::invalid();
4850   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4851     KmpTaskSharedsPtr =
4852         Address(CGF.EmitLoadOfScalar(
4853                     CGF.EmitLValueForField(
4854                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4855                                            KmpTaskTShareds)),
4856                     Loc),
4857                 CGF.getNaturalTypeAlignment(SharedsTy));
4858     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4859     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4860     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4861   }
4862   // Emit initial values for private copies (if any).
4863   TaskResultTy Result;
4864   if (!Privates.empty()) {
4865     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4866                      SharedsTy, SharedsPtrTy, Data, Privates,
4867                      /*ForDup=*/false);
4868     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4869         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4870       Result.TaskDupFn = emitTaskDupFunction(
4871           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4872           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4873           /*WithLastIter=*/!Data.LastprivateVars.empty());
4874     }
4875   }
4876   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4877   enum { Priority = 0, Destructors = 1 };
4878   // Provide pointer to function with destructors for privates.
4879   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4880   const RecordDecl *KmpCmplrdataUD =
4881       (*FI)->getType()->getAsUnionType()->getDecl();
4882   if (NeedsCleanup) {
4883     llvm::Value *DestructorFn = emitDestructorsFunction(
4884         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4885         KmpTaskTWithPrivatesQTy);
4886     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4887     LValue DestructorsLV = CGF.EmitLValueForField(
4888         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4889     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4890                               DestructorFn, KmpRoutineEntryPtrTy),
4891                           DestructorsLV);
4892   }
4893   // Set priority.
4894   if (Data.Priority.getInt()) {
4895     LValue Data2LV = CGF.EmitLValueForField(
4896         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4897     LValue PriorityLV = CGF.EmitLValueForField(
4898         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4899     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4900   }
4901   Result.NewTask = NewTask;
4902   Result.TaskEntry = TaskEntry;
4903   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4904   Result.TDBase = TDBase;
4905   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4906   return Result;
4907 }
4908 
4909 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
4910                                    const OMPExecutableDirective &D,
4911                                    llvm::Value *TaskFunction,
4912                                    QualType SharedsTy, Address Shareds,
4913                                    const Expr *IfCond,
4914                                    const OMPTaskDataTy &Data) {
4915   if (!CGF.HaveInsertPoint())
4916     return;
4917 
4918   TaskResultTy Result =
4919       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4920   llvm::Value *NewTask = Result.NewTask;
4921   llvm::Value *TaskEntry = Result.TaskEntry;
4922   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4923   LValue TDBase = Result.TDBase;
4924   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4925   ASTContext &C = CGM.getContext();
4926   // Process list of dependences.
4927   Address DependenciesArray = Address::invalid();
4928   unsigned NumDependencies = Data.Dependences.size();
4929   if (NumDependencies) {
4930     // Dependence kind for RTL.
4931     enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 };
4932     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4933     RecordDecl *KmpDependInfoRD;
4934     QualType FlagsTy =
4935         C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4936     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4937     if (KmpDependInfoTy.isNull()) {
4938       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4939       KmpDependInfoRD->startDefinition();
4940       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4941       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4942       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4943       KmpDependInfoRD->completeDefinition();
4944       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4945     } else {
4946       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4947     }
4948     CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy);
4949     // Define type kmp_depend_info[<Dependences.size()>];
4950     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4951         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
4952         ArrayType::Normal, /*IndexTypeQuals=*/0);
4953     // kmp_depend_info[<Dependences.size()>] deps;
4954     DependenciesArray =
4955         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4956     for (unsigned I = 0; I < NumDependencies; ++I) {
4957       const Expr *E = Data.Dependences[I].second;
4958       LValue Addr = CGF.EmitLValue(E);
4959       llvm::Value *Size;
4960       QualType Ty = E->getType();
4961       if (const auto *ASE =
4962               dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4963         LValue UpAddrLVal =
4964             CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
4965         llvm::Value *UpAddr =
4966             CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
4967         llvm::Value *LowIntPtr =
4968             CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
4969         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
4970         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4971       } else {
4972         Size = CGF.getTypeSize(Ty);
4973       }
4974       LValue Base = CGF.MakeAddrLValue(
4975           CGF.Builder.CreateConstArrayGEP(DependenciesArray, I, DependencySize),
4976           KmpDependInfoTy);
4977       // deps[i].base_addr = &<Dependences[i].second>;
4978       LValue BaseAddrLVal = CGF.EmitLValueForField(
4979           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4980       CGF.EmitStoreOfScalar(
4981           CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
4982           BaseAddrLVal);
4983       // deps[i].len = sizeof(<Dependences[i].second>);
4984       LValue LenLVal = CGF.EmitLValueForField(
4985           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4986       CGF.EmitStoreOfScalar(Size, LenLVal);
4987       // deps[i].flags = <Dependences[i].first>;
4988       RTLDependenceKindTy DepKind;
4989       switch (Data.Dependences[I].first) {
4990       case OMPC_DEPEND_in:
4991         DepKind = DepIn;
4992         break;
4993       // Out and InOut dependencies must use the same code.
4994       case OMPC_DEPEND_out:
4995       case OMPC_DEPEND_inout:
4996         DepKind = DepInOut;
4997         break;
4998       case OMPC_DEPEND_source:
4999       case OMPC_DEPEND_sink:
5000       case OMPC_DEPEND_unknown:
5001         llvm_unreachable("Unknown task dependence type");
5002       }
5003       LValue FlagsLVal = CGF.EmitLValueForField(
5004           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5005       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5006                             FlagsLVal);
5007     }
5008     DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5009         CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()),
5010         CGF.VoidPtrTy);
5011   }
5012 
5013   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5014   // libcall.
5015   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5016   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5017   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5018   // list is not empty
5019   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5020   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5021   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5022   llvm::Value *DepTaskArgs[7];
5023   if (NumDependencies) {
5024     DepTaskArgs[0] = UpLoc;
5025     DepTaskArgs[1] = ThreadID;
5026     DepTaskArgs[2] = NewTask;
5027     DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
5028     DepTaskArgs[4] = DependenciesArray.getPointer();
5029     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5030     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5031   }
5032   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
5033                         &TaskArgs,
5034                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5035     if (!Data.Tied) {
5036       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5037       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5038       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5039     }
5040     if (NumDependencies) {
5041       CGF.EmitRuntimeCall(
5042           createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
5043     } else {
5044       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
5045                           TaskArgs);
5046     }
5047     // Check if parent region is untied and build return for untied task;
5048     if (auto *Region =
5049             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5050       Region->emitUntiedSwitch(CGF);
5051   };
5052 
5053   llvm::Value *DepWaitTaskArgs[6];
5054   if (NumDependencies) {
5055     DepWaitTaskArgs[0] = UpLoc;
5056     DepWaitTaskArgs[1] = ThreadID;
5057     DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
5058     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5059     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5060     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5061   }
5062   auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
5063                         NumDependencies, &DepWaitTaskArgs,
5064                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5065     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5066     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5067     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5068     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5069     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5070     // is specified.
5071     if (NumDependencies)
5072       CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
5073                           DepWaitTaskArgs);
5074     // Call proxy_task_entry(gtid, new_task);
5075     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5076                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5077       Action.Enter(CGF);
5078       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5079       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5080                                                           OutlinedFnArgs);
5081     };
5082 
5083     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5084     // kmp_task_t *new_task);
5085     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5086     // kmp_task_t *new_task);
5087     RegionCodeGenTy RCG(CodeGen);
5088     CommonActionTy Action(
5089         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
5090         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
5091     RCG.setAction(Action);
5092     RCG(CGF);
5093   };
5094 
5095   if (IfCond) {
5096     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5097   } else {
5098     RegionCodeGenTy ThenRCG(ThenCodeGen);
5099     ThenRCG(CGF);
5100   }
5101 }
5102 
5103 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5104                                        const OMPLoopDirective &D,
5105                                        llvm::Value *TaskFunction,
5106                                        QualType SharedsTy, Address Shareds,
5107                                        const Expr *IfCond,
5108                                        const OMPTaskDataTy &Data) {
5109   if (!CGF.HaveInsertPoint())
5110     return;
5111   TaskResultTy Result =
5112       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5113   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5114   // libcall.
5115   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5116   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5117   // sched, kmp_uint64 grainsize, void *task_dup);
5118   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5119   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5120   llvm::Value *IfVal;
5121   if (IfCond) {
5122     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5123                                       /*isSigned=*/true);
5124   } else {
5125     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5126   }
5127 
5128   LValue LBLVal = CGF.EmitLValueForField(
5129       Result.TDBase,
5130       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5131   const auto *LBVar =
5132       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5133   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
5134                        /*IsInitializer=*/true);
5135   LValue UBLVal = CGF.EmitLValueForField(
5136       Result.TDBase,
5137       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5138   const auto *UBVar =
5139       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5140   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
5141                        /*IsInitializer=*/true);
5142   LValue StLVal = CGF.EmitLValueForField(
5143       Result.TDBase,
5144       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5145   const auto *StVar =
5146       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5147   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
5148                        /*IsInitializer=*/true);
5149   // Store reductions address.
5150   LValue RedLVal = CGF.EmitLValueForField(
5151       Result.TDBase,
5152       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5153   if (Data.Reductions) {
5154     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5155   } else {
5156     CGF.EmitNullInitialization(RedLVal.getAddress(),
5157                                CGF.getContext().VoidPtrTy);
5158   }
5159   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5160   llvm::Value *TaskArgs[] = {
5161       UpLoc,
5162       ThreadID,
5163       Result.NewTask,
5164       IfVal,
5165       LBLVal.getPointer(),
5166       UBLVal.getPointer(),
5167       CGF.EmitLoadOfScalar(StLVal, Loc),
5168       llvm::ConstantInt::getNullValue(
5169           CGF.IntTy), // Always 0 because taskgroup emitted by the compiler
5170       llvm::ConstantInt::getSigned(
5171           CGF.IntTy, Data.Schedule.getPointer()
5172                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5173                          : NoSchedule),
5174       Data.Schedule.getPointer()
5175           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5176                                       /*isSigned=*/false)
5177           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5178       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5179                              Result.TaskDupFn, CGF.VoidPtrTy)
5180                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5181   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
5182 }
5183 
5184 /// \brief Emit reduction operation for each element of array (required for
5185 /// array sections) LHS op = RHS.
5186 /// \param Type Type of array.
5187 /// \param LHSVar Variable on the left side of the reduction operation
5188 /// (references element of array in original variable).
5189 /// \param RHSVar Variable on the right side of the reduction operation
5190 /// (references element of array in original variable).
5191 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5192 /// RHSVar.
5193 static void EmitOMPAggregateReduction(
5194     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5195     const VarDecl *RHSVar,
5196     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5197                                   const Expr *, const Expr *)> &RedOpGen,
5198     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5199     const Expr *UpExpr = nullptr) {
5200   // Perform element-by-element initialization.
5201   QualType ElementTy;
5202   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5203   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5204 
5205   // Drill down to the base element type on both arrays.
5206   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5207   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5208 
5209   llvm::Value *RHSBegin = RHSAddr.getPointer();
5210   llvm::Value *LHSBegin = LHSAddr.getPointer();
5211   // Cast from pointer to array type to pointer to single element.
5212   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5213   // The basic structure here is a while-do loop.
5214   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5215   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5216   llvm::Value *IsEmpty =
5217       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5218   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5219 
5220   // Enter the loop body, making that address the current address.
5221   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5222   CGF.EmitBlock(BodyBB);
5223 
5224   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5225 
5226   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5227       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5228   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5229   Address RHSElementCurrent =
5230       Address(RHSElementPHI,
5231               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5232 
5233   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5234       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5235   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5236   Address LHSElementCurrent =
5237       Address(LHSElementPHI,
5238               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5239 
5240   // Emit copy.
5241   CodeGenFunction::OMPPrivateScope Scope(CGF);
5242   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5243   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5244   Scope.Privatize();
5245   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5246   Scope.ForceCleanup();
5247 
5248   // Shift the address forward by one element.
5249   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5250       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5251   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5252       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5253   // Check whether we've reached the end.
5254   llvm::Value *Done =
5255       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5256   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5257   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5258   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5259 
5260   // Done.
5261   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5262 }
5263 
5264 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5265 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5266 /// UDR combiner function.
5267 static void emitReductionCombiner(CodeGenFunction &CGF,
5268                                   const Expr *ReductionOp) {
5269   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5270     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5271       if (const auto *DRE =
5272               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5273         if (const auto *DRD =
5274                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5275           std::pair<llvm::Function *, llvm::Function *> Reduction =
5276               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5277           RValue Func = RValue::get(Reduction.first);
5278           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5279           CGF.EmitIgnoredExpr(ReductionOp);
5280           return;
5281         }
5282   CGF.EmitIgnoredExpr(ReductionOp);
5283 }
5284 
5285 llvm::Value *CGOpenMPRuntime::emitReductionFunction(
5286     CodeGenModule &CGM, SourceLocation Loc, llvm::Type *ArgsType,
5287     ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
5288     ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
5289   ASTContext &C = CGM.getContext();
5290 
5291   // void reduction_func(void *LHSArg, void *RHSArg);
5292   FunctionArgList Args;
5293   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5294                            ImplicitParamDecl::Other);
5295   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5296                            ImplicitParamDecl::Other);
5297   Args.push_back(&LHSArg);
5298   Args.push_back(&RHSArg);
5299   const auto &CGFI =
5300       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5301   auto *Fn = llvm::Function::Create(
5302       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
5303       ".omp.reduction.reduction_func", &CGM.getModule());
5304   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5305   Fn->setDoesNotRecurse();
5306   CodeGenFunction CGF(CGM);
5307   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5308 
5309   // Dst = (void*[n])(LHSArg);
5310   // Src = (void*[n])(RHSArg);
5311   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5312       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5313       ArgsType), CGF.getPointerAlign());
5314   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5315       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5316       ArgsType), CGF.getPointerAlign());
5317 
5318   //  ...
5319   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5320   //  ...
5321   CodeGenFunction::OMPPrivateScope Scope(CGF);
5322   auto IPriv = Privates.begin();
5323   unsigned Idx = 0;
5324   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5325     const auto *RHSVar =
5326         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5327     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5328       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5329     });
5330     const auto *LHSVar =
5331         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5332     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5333       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5334     });
5335     QualType PrivTy = (*IPriv)->getType();
5336     if (PrivTy->isVariablyModifiedType()) {
5337       // Get array size and emit VLA type.
5338       ++Idx;
5339       Address Elem =
5340           CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize());
5341       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5342       const VariableArrayType *VLA =
5343           CGF.getContext().getAsVariableArrayType(PrivTy);
5344       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5345       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5346           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5347       CGF.EmitVariablyModifiedType(PrivTy);
5348     }
5349   }
5350   Scope.Privatize();
5351   IPriv = Privates.begin();
5352   auto ILHS = LHSExprs.begin();
5353   auto IRHS = RHSExprs.begin();
5354   for (const Expr *E : ReductionOps) {
5355     if ((*IPriv)->getType()->isArrayType()) {
5356       // Emit reduction for array section.
5357       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5358       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5359       EmitOMPAggregateReduction(
5360           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5361           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5362             emitReductionCombiner(CGF, E);
5363           });
5364     } else {
5365       // Emit reduction for array subscript or single variable.
5366       emitReductionCombiner(CGF, E);
5367     }
5368     ++IPriv;
5369     ++ILHS;
5370     ++IRHS;
5371   }
5372   Scope.ForceCleanup();
5373   CGF.FinishFunction();
5374   return Fn;
5375 }
5376 
5377 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5378                                                   const Expr *ReductionOp,
5379                                                   const Expr *PrivateRef,
5380                                                   const DeclRefExpr *LHS,
5381                                                   const DeclRefExpr *RHS) {
5382   if (PrivateRef->getType()->isArrayType()) {
5383     // Emit reduction for array section.
5384     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5385     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5386     EmitOMPAggregateReduction(
5387         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5388         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5389           emitReductionCombiner(CGF, ReductionOp);
5390         });
5391   } else {
5392     // Emit reduction for array subscript or single variable.
5393     emitReductionCombiner(CGF, ReductionOp);
5394   }
5395 }
5396 
5397 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5398                                     ArrayRef<const Expr *> Privates,
5399                                     ArrayRef<const Expr *> LHSExprs,
5400                                     ArrayRef<const Expr *> RHSExprs,
5401                                     ArrayRef<const Expr *> ReductionOps,
5402                                     ReductionOptionsTy Options) {
5403   if (!CGF.HaveInsertPoint())
5404     return;
5405 
5406   bool WithNowait = Options.WithNowait;
5407   bool SimpleReduction = Options.SimpleReduction;
5408 
5409   // Next code should be emitted for reduction:
5410   //
5411   // static kmp_critical_name lock = { 0 };
5412   //
5413   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5414   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5415   //  ...
5416   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5417   //  *(Type<n>-1*)rhs[<n>-1]);
5418   // }
5419   //
5420   // ...
5421   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5422   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5423   // RedList, reduce_func, &<lock>)) {
5424   // case 1:
5425   //  ...
5426   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5427   //  ...
5428   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5429   // break;
5430   // case 2:
5431   //  ...
5432   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5433   //  ...
5434   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5435   // break;
5436   // default:;
5437   // }
5438   //
5439   // if SimpleReduction is true, only the next code is generated:
5440   //  ...
5441   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5442   //  ...
5443 
5444   ASTContext &C = CGM.getContext();
5445 
5446   if (SimpleReduction) {
5447     CodeGenFunction::RunCleanupsScope Scope(CGF);
5448     auto IPriv = Privates.begin();
5449     auto ILHS = LHSExprs.begin();
5450     auto IRHS = RHSExprs.begin();
5451     for (const Expr *E : ReductionOps) {
5452       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5453                                   cast<DeclRefExpr>(*IRHS));
5454       ++IPriv;
5455       ++ILHS;
5456       ++IRHS;
5457     }
5458     return;
5459   }
5460 
5461   // 1. Build a list of reduction variables.
5462   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5463   auto Size = RHSExprs.size();
5464   for (const Expr *E : Privates) {
5465     if (E->getType()->isVariablyModifiedType())
5466       // Reserve place for array size.
5467       ++Size;
5468   }
5469   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5470   QualType ReductionArrayTy =
5471       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
5472                              /*IndexTypeQuals=*/0);
5473   Address ReductionList =
5474       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5475   auto IPriv = Privates.begin();
5476   unsigned Idx = 0;
5477   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5478     Address Elem =
5479       CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize());
5480     CGF.Builder.CreateStore(
5481         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5482             CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
5483         Elem);
5484     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5485       // Store array size.
5486       ++Idx;
5487       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
5488                                              CGF.getPointerSize());
5489       llvm::Value *Size = CGF.Builder.CreateIntCast(
5490           CGF.getVLASize(
5491                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5492               .NumElts,
5493           CGF.SizeTy, /*isSigned=*/false);
5494       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5495                               Elem);
5496     }
5497   }
5498 
5499   // 2. Emit reduce_func().
5500   llvm::Value *ReductionFn = emitReductionFunction(
5501       CGM, Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(),
5502       Privates, LHSExprs, RHSExprs, ReductionOps);
5503 
5504   // 3. Create static kmp_critical_name lock = { 0 };
5505   llvm::Value *Lock = getCriticalRegionLock(".reduction");
5506 
5507   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5508   // RedList, reduce_func, &<lock>);
5509   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5510   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5511   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5512   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5513       ReductionList.getPointer(), CGF.VoidPtrTy);
5514   llvm::Value *Args[] = {
5515       IdentTLoc,                             // ident_t *<loc>
5516       ThreadId,                              // i32 <gtid>
5517       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5518       ReductionArrayTySize,                  // size_type sizeof(RedList)
5519       RL,                                    // void *RedList
5520       ReductionFn, // void (*) (void *, void *) <reduce_func>
5521       Lock         // kmp_critical_name *&<lock>
5522   };
5523   llvm::Value *Res = CGF.EmitRuntimeCall(
5524       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
5525                                        : OMPRTL__kmpc_reduce),
5526       Args);
5527 
5528   // 5. Build switch(res)
5529   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5530   llvm::SwitchInst *SwInst =
5531       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5532 
5533   // 6. Build case 1:
5534   //  ...
5535   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5536   //  ...
5537   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5538   // break;
5539   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5540   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5541   CGF.EmitBlock(Case1BB);
5542 
5543   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5544   llvm::Value *EndArgs[] = {
5545       IdentTLoc, // ident_t *<loc>
5546       ThreadId,  // i32 <gtid>
5547       Lock       // kmp_critical_name *&<lock>
5548   };
5549   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5550                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5551     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5552     auto IPriv = Privates.begin();
5553     auto ILHS = LHSExprs.begin();
5554     auto IRHS = RHSExprs.begin();
5555     for (const Expr *E : ReductionOps) {
5556       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5557                                      cast<DeclRefExpr>(*IRHS));
5558       ++IPriv;
5559       ++ILHS;
5560       ++IRHS;
5561     }
5562   };
5563   RegionCodeGenTy RCG(CodeGen);
5564   CommonActionTy Action(
5565       nullptr, llvm::None,
5566       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
5567                                        : OMPRTL__kmpc_end_reduce),
5568       EndArgs);
5569   RCG.setAction(Action);
5570   RCG(CGF);
5571 
5572   CGF.EmitBranch(DefaultBB);
5573 
5574   // 7. Build case 2:
5575   //  ...
5576   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5577   //  ...
5578   // break;
5579   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5580   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5581   CGF.EmitBlock(Case2BB);
5582 
5583   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5584                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5585     auto ILHS = LHSExprs.begin();
5586     auto IRHS = RHSExprs.begin();
5587     auto IPriv = Privates.begin();
5588     for (const Expr *E : ReductionOps) {
5589       const Expr *XExpr = nullptr;
5590       const Expr *EExpr = nullptr;
5591       const Expr *UpExpr = nullptr;
5592       BinaryOperatorKind BO = BO_Comma;
5593       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5594         if (BO->getOpcode() == BO_Assign) {
5595           XExpr = BO->getLHS();
5596           UpExpr = BO->getRHS();
5597         }
5598       }
5599       // Try to emit update expression as a simple atomic.
5600       const Expr *RHSExpr = UpExpr;
5601       if (RHSExpr) {
5602         // Analyze RHS part of the whole expression.
5603         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5604                 RHSExpr->IgnoreParenImpCasts())) {
5605           // If this is a conditional operator, analyze its condition for
5606           // min/max reduction operator.
5607           RHSExpr = ACO->getCond();
5608         }
5609         if (const auto *BORHS =
5610                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5611           EExpr = BORHS->getRHS();
5612           BO = BORHS->getOpcode();
5613         }
5614       }
5615       if (XExpr) {
5616         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5617         auto &&AtomicRedGen = [BO, VD,
5618                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5619                                     const Expr *EExpr, const Expr *UpExpr) {
5620           LValue X = CGF.EmitLValue(XExpr);
5621           RValue E;
5622           if (EExpr)
5623             E = CGF.EmitAnyExpr(EExpr);
5624           CGF.EmitOMPAtomicSimpleUpdateExpr(
5625               X, E, BO, /*IsXLHSInRHSPart=*/true,
5626               llvm::AtomicOrdering::Monotonic, Loc,
5627               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5628                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5629                 PrivateScope.addPrivate(
5630                     VD, [&CGF, VD, XRValue, Loc]() {
5631                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5632                       CGF.emitOMPSimpleStore(
5633                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5634                           VD->getType().getNonReferenceType(), Loc);
5635                       return LHSTemp;
5636                     });
5637                 (void)PrivateScope.Privatize();
5638                 return CGF.EmitAnyExpr(UpExpr);
5639               });
5640         };
5641         if ((*IPriv)->getType()->isArrayType()) {
5642           // Emit atomic reduction for array section.
5643           const auto *RHSVar =
5644               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5645           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5646                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5647         } else {
5648           // Emit atomic reduction for array subscript or single variable.
5649           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5650         }
5651       } else {
5652         // Emit as a critical region.
5653         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5654                                      const Expr *, const Expr *) {
5655           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5656           RT.emitCriticalRegion(
5657               CGF, ".atomic_reduction",
5658               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5659                 Action.Enter(CGF);
5660                 emitReductionCombiner(CGF, E);
5661               },
5662               Loc);
5663         };
5664         if ((*IPriv)->getType()->isArrayType()) {
5665           const auto *LHSVar =
5666               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5667           const auto *RHSVar =
5668               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5669           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5670                                     CritRedGen);
5671         } else {
5672           CritRedGen(CGF, nullptr, nullptr, nullptr);
5673         }
5674       }
5675       ++ILHS;
5676       ++IRHS;
5677       ++IPriv;
5678     }
5679   };
5680   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5681   if (!WithNowait) {
5682     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5683     llvm::Value *EndArgs[] = {
5684         IdentTLoc, // ident_t *<loc>
5685         ThreadId,  // i32 <gtid>
5686         Lock       // kmp_critical_name *&<lock>
5687     };
5688     CommonActionTy Action(nullptr, llvm::None,
5689                           createRuntimeFunction(OMPRTL__kmpc_end_reduce),
5690                           EndArgs);
5691     AtomicRCG.setAction(Action);
5692     AtomicRCG(CGF);
5693   } else {
5694     AtomicRCG(CGF);
5695   }
5696 
5697   CGF.EmitBranch(DefaultBB);
5698   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5699 }
5700 
5701 /// Generates unique name for artificial threadprivate variables.
5702 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5703 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5704                                       const Expr *Ref) {
5705   SmallString<256> Buffer;
5706   llvm::raw_svector_ostream Out(Buffer);
5707   const clang::DeclRefExpr *DE;
5708   const VarDecl *D = ::getBaseDecl(Ref, DE);
5709   if (!D)
5710     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5711   D = D->getCanonicalDecl();
5712   Out << Prefix << "."
5713       << (D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D))
5714       << "_" << D->getCanonicalDecl()->getLocStart().getRawEncoding();
5715   return Out.str();
5716 }
5717 
5718 /// Emits reduction initializer function:
5719 /// \code
5720 /// void @.red_init(void* %arg) {
5721 /// %0 = bitcast void* %arg to <type>*
5722 /// store <type> <init>, <type>* %0
5723 /// ret void
5724 /// }
5725 /// \endcode
5726 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5727                                            SourceLocation Loc,
5728                                            ReductionCodeGen &RCG, unsigned N) {
5729   ASTContext &C = CGM.getContext();
5730   FunctionArgList Args;
5731   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5732                           ImplicitParamDecl::Other);
5733   Args.emplace_back(&Param);
5734   const auto &FnInfo =
5735       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5736   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5737   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5738                                     ".red_init.", &CGM.getModule());
5739   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5740   Fn->setDoesNotRecurse();
5741   CodeGenFunction CGF(CGM);
5742   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5743   Address PrivateAddr = CGF.EmitLoadOfPointer(
5744       CGF.GetAddrOfLocalVar(&Param),
5745       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5746   llvm::Value *Size = nullptr;
5747   // If the size of the reduction item is non-constant, load it from global
5748   // threadprivate variable.
5749   if (RCG.getSizes(N).second) {
5750     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5751         CGF, CGM.getContext().getSizeType(),
5752         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5753     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5754                                 CGM.getContext().getSizeType(), Loc);
5755   }
5756   RCG.emitAggregateType(CGF, N, Size);
5757   LValue SharedLVal;
5758   // If initializer uses initializer from declare reduction construct, emit a
5759   // pointer to the address of the original reduction item (reuired by reduction
5760   // initializer)
5761   if (RCG.usesReductionInitializer(N)) {
5762     Address SharedAddr =
5763         CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5764             CGF, CGM.getContext().VoidPtrTy,
5765             generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
5766     SharedAddr = CGF.EmitLoadOfPointer(
5767         SharedAddr,
5768         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5769     SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5770   } else {
5771     SharedLVal = CGF.MakeNaturalAlignAddrLValue(
5772         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5773         CGM.getContext().VoidPtrTy);
5774   }
5775   // Emit the initializer:
5776   // %0 = bitcast void* %arg to <type>*
5777   // store <type> <init>, <type>* %0
5778   RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
5779                          [](CodeGenFunction &) { return false; });
5780   CGF.FinishFunction();
5781   return Fn;
5782 }
5783 
5784 /// Emits reduction combiner function:
5785 /// \code
5786 /// void @.red_comb(void* %arg0, void* %arg1) {
5787 /// %lhs = bitcast void* %arg0 to <type>*
5788 /// %rhs = bitcast void* %arg1 to <type>*
5789 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5790 /// store <type> %2, <type>* %lhs
5791 /// ret void
5792 /// }
5793 /// \endcode
5794 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5795                                            SourceLocation Loc,
5796                                            ReductionCodeGen &RCG, unsigned N,
5797                                            const Expr *ReductionOp,
5798                                            const Expr *LHS, const Expr *RHS,
5799                                            const Expr *PrivateRef) {
5800   ASTContext &C = CGM.getContext();
5801   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5802   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5803   FunctionArgList Args;
5804   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5805                                C.VoidPtrTy, ImplicitParamDecl::Other);
5806   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5807                             ImplicitParamDecl::Other);
5808   Args.emplace_back(&ParamInOut);
5809   Args.emplace_back(&ParamIn);
5810   const auto &FnInfo =
5811       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5812   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5813   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5814                                     ".red_comb.", &CGM.getModule());
5815   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5816   Fn->setDoesNotRecurse();
5817   CodeGenFunction CGF(CGM);
5818   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5819   llvm::Value *Size = nullptr;
5820   // If the size of the reduction item is non-constant, load it from global
5821   // threadprivate variable.
5822   if (RCG.getSizes(N).second) {
5823     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5824         CGF, CGM.getContext().getSizeType(),
5825         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5826     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5827                                 CGM.getContext().getSizeType(), Loc);
5828   }
5829   RCG.emitAggregateType(CGF, N, Size);
5830   // Remap lhs and rhs variables to the addresses of the function arguments.
5831   // %lhs = bitcast void* %arg0 to <type>*
5832   // %rhs = bitcast void* %arg1 to <type>*
5833   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5834   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5835     // Pull out the pointer to the variable.
5836     Address PtrAddr = CGF.EmitLoadOfPointer(
5837         CGF.GetAddrOfLocalVar(&ParamInOut),
5838         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5839     return CGF.Builder.CreateElementBitCast(
5840         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5841   });
5842   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5843     // Pull out the pointer to the variable.
5844     Address PtrAddr = CGF.EmitLoadOfPointer(
5845         CGF.GetAddrOfLocalVar(&ParamIn),
5846         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5847     return CGF.Builder.CreateElementBitCast(
5848         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5849   });
5850   PrivateScope.Privatize();
5851   // Emit the combiner body:
5852   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5853   // store <type> %2, <type>* %lhs
5854   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5855       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5856       cast<DeclRefExpr>(RHS));
5857   CGF.FinishFunction();
5858   return Fn;
5859 }
5860 
5861 /// Emits reduction finalizer function:
5862 /// \code
5863 /// void @.red_fini(void* %arg) {
5864 /// %0 = bitcast void* %arg to <type>*
5865 /// <destroy>(<type>* %0)
5866 /// ret void
5867 /// }
5868 /// \endcode
5869 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5870                                            SourceLocation Loc,
5871                                            ReductionCodeGen &RCG, unsigned N) {
5872   if (!RCG.needCleanups(N))
5873     return nullptr;
5874   ASTContext &C = CGM.getContext();
5875   FunctionArgList Args;
5876   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5877                           ImplicitParamDecl::Other);
5878   Args.emplace_back(&Param);
5879   const auto &FnInfo =
5880       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5881   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5882   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5883                                     ".red_fini.", &CGM.getModule());
5884   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5885   Fn->setDoesNotRecurse();
5886   CodeGenFunction CGF(CGM);
5887   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5888   Address PrivateAddr = CGF.EmitLoadOfPointer(
5889       CGF.GetAddrOfLocalVar(&Param),
5890       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5891   llvm::Value *Size = nullptr;
5892   // If the size of the reduction item is non-constant, load it from global
5893   // threadprivate variable.
5894   if (RCG.getSizes(N).second) {
5895     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5896         CGF, CGM.getContext().getSizeType(),
5897         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5898     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5899                                 CGM.getContext().getSizeType(), Loc);
5900   }
5901   RCG.emitAggregateType(CGF, N, Size);
5902   // Emit the finalizer body:
5903   // <destroy>(<type>* %0)
5904   RCG.emitCleanups(CGF, N, PrivateAddr);
5905   CGF.FinishFunction();
5906   return Fn;
5907 }
5908 
5909 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5910     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5911     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5912   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5913     return nullptr;
5914 
5915   // Build typedef struct:
5916   // kmp_task_red_input {
5917   //   void *reduce_shar; // shared reduction item
5918   //   size_t reduce_size; // size of data item
5919   //   void *reduce_init; // data initialization routine
5920   //   void *reduce_fini; // data finalization routine
5921   //   void *reduce_comb; // data combiner routine
5922   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
5923   // } kmp_task_red_input_t;
5924   ASTContext &C = CGM.getContext();
5925   RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t");
5926   RD->startDefinition();
5927   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5928   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5929   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5930   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5931   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5932   const FieldDecl *FlagsFD = addFieldToRecordDecl(
5933       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5934   RD->completeDefinition();
5935   QualType RDType = C.getRecordType(RD);
5936   unsigned Size = Data.ReductionVars.size();
5937   llvm::APInt ArraySize(/*numBits=*/64, Size);
5938   QualType ArrayRDType = C.getConstantArrayType(
5939       RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0);
5940   // kmp_task_red_input_t .rd_input.[Size];
5941   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5942   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
5943                        Data.ReductionOps);
5944   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5945     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5946     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5947                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5948     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5949         TaskRedInput.getPointer(), Idxs,
5950         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5951         ".rd_input.gep.");
5952     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
5953     // ElemLVal.reduce_shar = &Shareds[Cnt];
5954     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5955     RCG.emitSharedLValue(CGF, Cnt);
5956     llvm::Value *CastedShared =
5957         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer());
5958     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
5959     RCG.emitAggregateType(CGF, Cnt);
5960     llvm::Value *SizeValInChars;
5961     llvm::Value *SizeVal;
5962     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5963     // We use delayed creation/initialization for VLAs, array sections and
5964     // custom reduction initializations. It is required because runtime does not
5965     // provide the way to pass the sizes of VLAs/array sections to
5966     // initializer/combiner/finalizer functions and does not pass the pointer to
5967     // original reduction item to the initializer. Instead threadprivate global
5968     // variables are used to store these values and use them in the functions.
5969     bool DelayedCreation = !!SizeVal;
5970     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5971                                                /*isSigned=*/false);
5972     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5973     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5974     // ElemLVal.reduce_init = init;
5975     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5976     llvm::Value *InitAddr =
5977         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
5978     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5979     DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
5980     // ElemLVal.reduce_fini = fini;
5981     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5982     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5983     llvm::Value *FiniAddr = Fini
5984                                 ? CGF.EmitCastToVoidPtr(Fini)
5985                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5986     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5987     // ElemLVal.reduce_comb = comb;
5988     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5989     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
5990         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5991         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
5992     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5993     // ElemLVal.flags = 0;
5994     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5995     if (DelayedCreation) {
5996       CGF.EmitStoreOfScalar(
5997           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*IsSigned=*/true),
5998           FlagsLVal);
5999     } else
6000       CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
6001   }
6002   // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
6003   // *data);
6004   llvm::Value *Args[] = {
6005       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6006                                 /*isSigned=*/true),
6007       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6008       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6009                                                       CGM.VoidPtrTy)};
6010   return CGF.EmitRuntimeCall(
6011       createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args);
6012 }
6013 
6014 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6015                                               SourceLocation Loc,
6016                                               ReductionCodeGen &RCG,
6017                                               unsigned N) {
6018   auto Sizes = RCG.getSizes(N);
6019   // Emit threadprivate global variable if the type is non-constant
6020   // (Sizes.second = nullptr).
6021   if (Sizes.second) {
6022     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6023                                                      /*isSigned=*/false);
6024     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6025         CGF, CGM.getContext().getSizeType(),
6026         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6027     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6028   }
6029   // Store address of the original reduction item if custom initializer is used.
6030   if (RCG.usesReductionInitializer(N)) {
6031     Address SharedAddr = getAddrOfArtificialThreadPrivate(
6032         CGF, CGM.getContext().VoidPtrTy,
6033         generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6034     CGF.Builder.CreateStore(
6035         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6036             RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy),
6037         SharedAddr, /*IsVolatile=*/false);
6038   }
6039 }
6040 
6041 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6042                                               SourceLocation Loc,
6043                                               llvm::Value *ReductionsPtr,
6044                                               LValue SharedLVal) {
6045   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6046   // *d);
6047   llvm::Value *Args[] = {
6048       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6049                                 /*isSigned=*/true),
6050       ReductionsPtr,
6051       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(),
6052                                                       CGM.VoidPtrTy)};
6053   return Address(
6054       CGF.EmitRuntimeCall(
6055           createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
6056       SharedLVal.getAlignment());
6057 }
6058 
6059 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6060                                        SourceLocation Loc) {
6061   if (!CGF.HaveInsertPoint())
6062     return;
6063   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6064   // global_tid);
6065   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6066   // Ignore return result until untied tasks are supported.
6067   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
6068   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6069     Region->emitUntiedSwitch(CGF);
6070 }
6071 
6072 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6073                                            OpenMPDirectiveKind InnerKind,
6074                                            const RegionCodeGenTy &CodeGen,
6075                                            bool HasCancel) {
6076   if (!CGF.HaveInsertPoint())
6077     return;
6078   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6079   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6080 }
6081 
6082 namespace {
6083 enum RTCancelKind {
6084   CancelNoreq = 0,
6085   CancelParallel = 1,
6086   CancelLoop = 2,
6087   CancelSections = 3,
6088   CancelTaskgroup = 4
6089 };
6090 } // anonymous namespace
6091 
6092 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6093   RTCancelKind CancelKind = CancelNoreq;
6094   if (CancelRegion == OMPD_parallel)
6095     CancelKind = CancelParallel;
6096   else if (CancelRegion == OMPD_for)
6097     CancelKind = CancelLoop;
6098   else if (CancelRegion == OMPD_sections)
6099     CancelKind = CancelSections;
6100   else {
6101     assert(CancelRegion == OMPD_taskgroup);
6102     CancelKind = CancelTaskgroup;
6103   }
6104   return CancelKind;
6105 }
6106 
6107 void CGOpenMPRuntime::emitCancellationPointCall(
6108     CodeGenFunction &CGF, SourceLocation Loc,
6109     OpenMPDirectiveKind CancelRegion) {
6110   if (!CGF.HaveInsertPoint())
6111     return;
6112   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6113   // global_tid, kmp_int32 cncl_kind);
6114   if (auto *OMPRegionInfo =
6115           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6116     // For 'cancellation point taskgroup', the task region info may not have a
6117     // cancel. This may instead happen in another adjacent task.
6118     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6119       llvm::Value *Args[] = {
6120           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6121           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6122       // Ignore return result until untied tasks are supported.
6123       llvm::Value *Result = CGF.EmitRuntimeCall(
6124           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
6125       // if (__kmpc_cancellationpoint()) {
6126       //   exit from construct;
6127       // }
6128       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6129       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6130       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6131       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6132       CGF.EmitBlock(ExitBB);
6133       // exit from construct;
6134       CodeGenFunction::JumpDest CancelDest =
6135           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6136       CGF.EmitBranchThroughCleanup(CancelDest);
6137       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6138     }
6139   }
6140 }
6141 
6142 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6143                                      const Expr *IfCond,
6144                                      OpenMPDirectiveKind CancelRegion) {
6145   if (!CGF.HaveInsertPoint())
6146     return;
6147   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6148   // kmp_int32 cncl_kind);
6149   if (auto *OMPRegionInfo =
6150           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6151     auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
6152                                                         PrePostActionTy &) {
6153       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6154       llvm::Value *Args[] = {
6155           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6156           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6157       // Ignore return result until untied tasks are supported.
6158       llvm::Value *Result = CGF.EmitRuntimeCall(
6159           RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
6160       // if (__kmpc_cancel()) {
6161       //   exit from construct;
6162       // }
6163       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6164       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6165       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6166       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6167       CGF.EmitBlock(ExitBB);
6168       // exit from construct;
6169       CodeGenFunction::JumpDest CancelDest =
6170           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6171       CGF.EmitBranchThroughCleanup(CancelDest);
6172       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6173     };
6174     if (IfCond) {
6175       emitOMPIfClause(CGF, IfCond, ThenGen,
6176                       [](CodeGenFunction &, PrePostActionTy &) {});
6177     } else {
6178       RegionCodeGenTy ThenRCG(ThenGen);
6179       ThenRCG(CGF);
6180     }
6181   }
6182 }
6183 
6184 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6185     const OMPExecutableDirective &D, StringRef ParentName,
6186     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6187     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6188   assert(!ParentName.empty() && "Invalid target region parent name!");
6189   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6190                                    IsOffloadEntry, CodeGen);
6191 }
6192 
6193 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6194     const OMPExecutableDirective &D, StringRef ParentName,
6195     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6196     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6197   // Create a unique name for the entry function using the source location
6198   // information of the current target region. The name will be something like:
6199   //
6200   // __omp_offloading_DD_FFFF_PP_lBB
6201   //
6202   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6203   // mangled name of the function that encloses the target region and BB is the
6204   // line number of the target region.
6205 
6206   unsigned DeviceID;
6207   unsigned FileID;
6208   unsigned Line;
6209   getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID,
6210                            Line);
6211   SmallString<64> EntryFnName;
6212   {
6213     llvm::raw_svector_ostream OS(EntryFnName);
6214     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6215        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6216   }
6217 
6218   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6219 
6220   CodeGenFunction CGF(CGM, true);
6221   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6222   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6223 
6224   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
6225 
6226   // If this target outline function is not an offload entry, we don't need to
6227   // register it.
6228   if (!IsOffloadEntry)
6229     return;
6230 
6231   // The target region ID is used by the runtime library to identify the current
6232   // target region, so it only has to be unique and not necessarily point to
6233   // anything. It could be the pointer to the outlined function that implements
6234   // the target region, but we aren't using that so that the compiler doesn't
6235   // need to keep that, and could therefore inline the host function if proven
6236   // worthwhile during optimization. In the other hand, if emitting code for the
6237   // device, the ID has to be the function address so that it can retrieved from
6238   // the offloading entry and launched by the runtime library. We also mark the
6239   // outlined function to have external linkage in case we are emitting code for
6240   // the device, because these functions will be entry points to the device.
6241 
6242   if (CGM.getLangOpts().OpenMPIsDevice) {
6243     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6244     OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage);
6245     OutlinedFn->setDSOLocal(false);
6246   } else {
6247     OutlinedFnID = new llvm::GlobalVariable(
6248         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6249         llvm::GlobalValue::PrivateLinkage,
6250         llvm::Constant::getNullValue(CGM.Int8Ty), ".omp_offload.region_id");
6251   }
6252 
6253   // Register the information for the entry associated with this target region.
6254   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6255       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6256       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6257 }
6258 
6259 /// discard all CompoundStmts intervening between two constructs
6260 static const Stmt *ignoreCompoundStmts(const Stmt *Body) {
6261   while (const auto *CS = dyn_cast_or_null<CompoundStmt>(Body))
6262     Body = CS->body_front();
6263 
6264   return Body;
6265 }
6266 
6267 /// Emit the number of teams for a target directive.  Inspect the num_teams
6268 /// clause associated with a teams construct combined or closely nested
6269 /// with the target directive.
6270 ///
6271 /// Emit a team of size one for directives such as 'target parallel' that
6272 /// have no associated teams construct.
6273 ///
6274 /// Otherwise, return nullptr.
6275 static llvm::Value *
6276 emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
6277                                CodeGenFunction &CGF,
6278                                const OMPExecutableDirective &D) {
6279   assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
6280                                               "teams directive expected to be "
6281                                               "emitted only for the host!");
6282 
6283   CGBuilderTy &Bld = CGF.Builder;
6284 
6285   // If the target directive is combined with a teams directive:
6286   //   Return the value in the num_teams clause, if any.
6287   //   Otherwise, return 0 to denote the runtime default.
6288   if (isOpenMPTeamsDirective(D.getDirectiveKind())) {
6289     if (const auto *NumTeamsClause = D.getSingleClause<OMPNumTeamsClause>()) {
6290       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6291       llvm::Value *NumTeams = CGF.EmitScalarExpr(NumTeamsClause->getNumTeams(),
6292                                                  /*IgnoreResultAssign*/ true);
6293       return Bld.CreateIntCast(NumTeams, CGF.Int32Ty,
6294                                /*IsSigned=*/true);
6295     }
6296 
6297     // The default value is 0.
6298     return Bld.getInt32(0);
6299   }
6300 
6301   // If the target directive is combined with a parallel directive but not a
6302   // teams directive, start one team.
6303   if (isOpenMPParallelDirective(D.getDirectiveKind()))
6304     return Bld.getInt32(1);
6305 
6306   // If the current target region has a teams region enclosed, we need to get
6307   // the number of teams to pass to the runtime function call. This is done
6308   // by generating the expression in a inlined region. This is required because
6309   // the expression is captured in the enclosing target environment when the
6310   // teams directive is not combined with target.
6311 
6312   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6313 
6314   if (const auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>(
6315           ignoreCompoundStmts(CS.getCapturedStmt()))) {
6316     if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) {
6317       if (const auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) {
6318         CGOpenMPInnerExprInfo CGInfo(CGF, CS);
6319         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6320         llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams());
6321         return Bld.CreateIntCast(NumTeams, CGF.Int32Ty,
6322                                  /*IsSigned=*/true);
6323       }
6324 
6325       // If we have an enclosed teams directive but no num_teams clause we use
6326       // the default value 0.
6327       return Bld.getInt32(0);
6328     }
6329   }
6330 
6331   // No teams associated with the directive.
6332   return nullptr;
6333 }
6334 
6335 /// Emit the number of threads for a target directive.  Inspect the
6336 /// thread_limit clause associated with a teams construct combined or closely
6337 /// nested with the target directive.
6338 ///
6339 /// Emit the num_threads clause for directives such as 'target parallel' that
6340 /// have no associated teams construct.
6341 ///
6342 /// Otherwise, return nullptr.
6343 static llvm::Value *
6344 emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
6345                                  CodeGenFunction &CGF,
6346                                  const OMPExecutableDirective &D) {
6347   assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
6348                                               "teams directive expected to be "
6349                                               "emitted only for the host!");
6350 
6351   CGBuilderTy &Bld = CGF.Builder;
6352 
6353   //
6354   // If the target directive is combined with a teams directive:
6355   //   Return the value in the thread_limit clause, if any.
6356   //
6357   // If the target directive is combined with a parallel directive:
6358   //   Return the value in the num_threads clause, if any.
6359   //
6360   // If both clauses are set, select the minimum of the two.
6361   //
6362   // If neither teams or parallel combined directives set the number of threads
6363   // in a team, return 0 to denote the runtime default.
6364   //
6365   // If this is not a teams directive return nullptr.
6366 
6367   if (isOpenMPTeamsDirective(D.getDirectiveKind()) ||
6368       isOpenMPParallelDirective(D.getDirectiveKind())) {
6369     llvm::Value *DefaultThreadLimitVal = Bld.getInt32(0);
6370     llvm::Value *NumThreadsVal = nullptr;
6371     llvm::Value *ThreadLimitVal = nullptr;
6372 
6373     if (const auto *ThreadLimitClause =
6374             D.getSingleClause<OMPThreadLimitClause>()) {
6375       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6376       llvm::Value *ThreadLimit =
6377           CGF.EmitScalarExpr(ThreadLimitClause->getThreadLimit(),
6378                              /*IgnoreResultAssign*/ true);
6379       ThreadLimitVal = Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty,
6380                                          /*IsSigned=*/true);
6381     }
6382 
6383     if (const auto *NumThreadsClause =
6384             D.getSingleClause<OMPNumThreadsClause>()) {
6385       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6386       llvm::Value *NumThreads =
6387           CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
6388                              /*IgnoreResultAssign*/ true);
6389       NumThreadsVal =
6390           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/true);
6391     }
6392 
6393     // Select the lesser of thread_limit and num_threads.
6394     if (NumThreadsVal)
6395       ThreadLimitVal = ThreadLimitVal
6396                            ? Bld.CreateSelect(Bld.CreateICmpSLT(NumThreadsVal,
6397                                                                 ThreadLimitVal),
6398                                               NumThreadsVal, ThreadLimitVal)
6399                            : NumThreadsVal;
6400 
6401     // Set default value passed to the runtime if either teams or a target
6402     // parallel type directive is found but no clause is specified.
6403     if (!ThreadLimitVal)
6404       ThreadLimitVal = DefaultThreadLimitVal;
6405 
6406     return ThreadLimitVal;
6407   }
6408 
6409   // If the current target region has a teams region enclosed, we need to get
6410   // the thread limit to pass to the runtime function call. This is done
6411   // by generating the expression in a inlined region. This is required because
6412   // the expression is captured in the enclosing target environment when the
6413   // teams directive is not combined with target.
6414 
6415   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6416 
6417   if (const auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>(
6418           ignoreCompoundStmts(CS.getCapturedStmt()))) {
6419     if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) {
6420       if (const auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) {
6421         CGOpenMPInnerExprInfo CGInfo(CGF, CS);
6422         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6423         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit());
6424         return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty,
6425                                          /*IsSigned=*/true);
6426       }
6427 
6428       // If we have an enclosed teams directive but no thread_limit clause we
6429       // use the default value 0.
6430       return CGF.Builder.getInt32(0);
6431     }
6432   }
6433 
6434   // No teams associated with the directive.
6435   return nullptr;
6436 }
6437 
6438 namespace {
6439 // \brief Utility to handle information from clauses associated with a given
6440 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6441 // It provides a convenient interface to obtain the information and generate
6442 // code for that information.
6443 class MappableExprsHandler {
6444 public:
6445   /// \brief Values for bit flags used to specify the mapping type for
6446   /// offloading.
6447   enum OpenMPOffloadMappingFlags {
6448     /// \brief Allocate memory on the device and move data from host to device.
6449     OMP_MAP_TO = 0x01,
6450     /// \brief Allocate memory on the device and move data from device to host.
6451     OMP_MAP_FROM = 0x02,
6452     /// \brief Always perform the requested mapping action on the element, even
6453     /// if it was already mapped before.
6454     OMP_MAP_ALWAYS = 0x04,
6455     /// \brief Delete the element from the device environment, ignoring the
6456     /// current reference count associated with the element.
6457     OMP_MAP_DELETE = 0x08,
6458     /// \brief The element being mapped is a pointer-pointee pair; both the
6459     /// pointer and the pointee should be mapped.
6460     OMP_MAP_PTR_AND_OBJ = 0x10,
6461     /// \brief This flags signals that the base address of an entry should be
6462     /// passed to the target kernel as an argument.
6463     OMP_MAP_TARGET_PARAM = 0x20,
6464     /// \brief Signal that the runtime library has to return the device pointer
6465     /// in the current position for the data being mapped. Used when we have the
6466     /// use_device_ptr clause.
6467     OMP_MAP_RETURN_PARAM = 0x40,
6468     /// \brief This flag signals that the reference being passed is a pointer to
6469     /// private data.
6470     OMP_MAP_PRIVATE = 0x80,
6471     /// \brief Pass the element to the device by value.
6472     OMP_MAP_LITERAL = 0x100,
6473     /// Implicit map
6474     OMP_MAP_IMPLICIT = 0x200,
6475   };
6476 
6477   /// Class that associates information with a base pointer to be passed to the
6478   /// runtime library.
6479   class BasePointerInfo {
6480     /// The base pointer.
6481     llvm::Value *Ptr = nullptr;
6482     /// The base declaration that refers to this device pointer, or null if
6483     /// there is none.
6484     const ValueDecl *DevPtrDecl = nullptr;
6485 
6486   public:
6487     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
6488         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
6489     llvm::Value *operator*() const { return Ptr; }
6490     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
6491     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
6492   };
6493 
6494   typedef SmallVector<BasePointerInfo, 16> MapBaseValuesArrayTy;
6495   typedef SmallVector<llvm::Value *, 16> MapValuesArrayTy;
6496   typedef SmallVector<uint64_t, 16> MapFlagsArrayTy;
6497 
6498 private:
6499   /// \brief Directive from where the map clauses were extracted.
6500   const OMPExecutableDirective &CurDir;
6501 
6502   /// \brief Function the directive is being generated for.
6503   CodeGenFunction &CGF;
6504 
6505   /// \brief Set of all first private variables in the current directive.
6506   llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls;
6507   /// Set of all reduction variables in the current directive.
6508   llvm::SmallPtrSet<const VarDecl *, 8> ReductionDecls;
6509 
6510   /// Map between device pointer declarations and their expression components.
6511   /// The key value for declarations in 'this' is null.
6512   llvm::DenseMap<
6513       const ValueDecl *,
6514       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6515       DevPointersMap;
6516 
6517   llvm::Value *getExprTypeSize(const Expr *E) const {
6518     QualType ExprTy = E->getType().getCanonicalType();
6519 
6520     // Reference types are ignored for mapping purposes.
6521     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
6522       ExprTy = RefTy->getPointeeType().getCanonicalType();
6523 
6524     // Given that an array section is considered a built-in type, we need to
6525     // do the calculation based on the length of the section instead of relying
6526     // on CGF.getTypeSize(E->getType()).
6527     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
6528       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
6529                             OAE->getBase()->IgnoreParenImpCasts())
6530                             .getCanonicalType();
6531 
6532       // If there is no length associated with the expression, that means we
6533       // are using the whole length of the base.
6534       if (!OAE->getLength() && OAE->getColonLoc().isValid())
6535         return CGF.getTypeSize(BaseTy);
6536 
6537       llvm::Value *ElemSize;
6538       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
6539         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
6540       } else {
6541         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
6542         assert(ATy && "Expecting array type if not a pointer type.");
6543         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
6544       }
6545 
6546       // If we don't have a length at this point, that is because we have an
6547       // array section with a single element.
6548       if (!OAE->getLength())
6549         return ElemSize;
6550 
6551       llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength());
6552       LengthVal =
6553           CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false);
6554       return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
6555     }
6556     return CGF.getTypeSize(ExprTy);
6557   }
6558 
6559   /// \brief Return the corresponding bits for a given map clause modifier. Add
6560   /// a flag marking the map as a pointer if requested. Add a flag marking the
6561   /// map as the first one of a series of maps that relate to the same map
6562   /// expression.
6563   uint64_t getMapTypeBits(OpenMPMapClauseKind MapType,
6564                           OpenMPMapClauseKind MapTypeModifier, bool AddPtrFlag,
6565                           bool AddIsTargetParamFlag) const {
6566     uint64_t Bits = 0u;
6567     switch (MapType) {
6568     case OMPC_MAP_alloc:
6569     case OMPC_MAP_release:
6570       // alloc and release is the default behavior in the runtime library,  i.e.
6571       // if we don't pass any bits alloc/release that is what the runtime is
6572       // going to do. Therefore, we don't need to signal anything for these two
6573       // type modifiers.
6574       break;
6575     case OMPC_MAP_to:
6576       Bits = OMP_MAP_TO;
6577       break;
6578     case OMPC_MAP_from:
6579       Bits = OMP_MAP_FROM;
6580       break;
6581     case OMPC_MAP_tofrom:
6582       Bits = OMP_MAP_TO | OMP_MAP_FROM;
6583       break;
6584     case OMPC_MAP_delete:
6585       Bits = OMP_MAP_DELETE;
6586       break;
6587     default:
6588       llvm_unreachable("Unexpected map type!");
6589       break;
6590     }
6591     if (AddPtrFlag)
6592       Bits |= OMP_MAP_PTR_AND_OBJ;
6593     if (AddIsTargetParamFlag)
6594       Bits |= OMP_MAP_TARGET_PARAM;
6595     if (MapTypeModifier == OMPC_MAP_always)
6596       Bits |= OMP_MAP_ALWAYS;
6597     return Bits;
6598   }
6599 
6600   /// \brief Return true if the provided expression is a final array section. A
6601   /// final array section, is one whose length can't be proved to be one.
6602   bool isFinalArraySectionExpression(const Expr *E) const {
6603     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
6604 
6605     // It is not an array section and therefore not a unity-size one.
6606     if (!OASE)
6607       return false;
6608 
6609     // An array section with no colon always refer to a single element.
6610     if (OASE->getColonLoc().isInvalid())
6611       return false;
6612 
6613     const Expr *Length = OASE->getLength();
6614 
6615     // If we don't have a length we have to check if the array has size 1
6616     // for this dimension. Also, we should always expect a length if the
6617     // base type is pointer.
6618     if (!Length) {
6619       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
6620                              OASE->getBase()->IgnoreParenImpCasts())
6621                              .getCanonicalType();
6622       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
6623         return ATy->getSize().getSExtValue() != 1;
6624       // If we don't have a constant dimension length, we have to consider
6625       // the current section as having any size, so it is not necessarily
6626       // unitary. If it happen to be unity size, that's user fault.
6627       return true;
6628     }
6629 
6630     // Check if the length evaluates to 1.
6631     llvm::APSInt ConstLength;
6632     if (!Length->EvaluateAsInt(ConstLength, CGF.getContext()))
6633       return true; // Can have more that size 1.
6634 
6635     return ConstLength.getSExtValue() != 1;
6636   }
6637 
6638   /// \brief Return the adjusted map modifiers if the declaration a capture
6639   /// refers to appears in a first-private clause. This is expected to be used
6640   /// only with directives that start with 'target'.
6641   unsigned adjustMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap,
6642                                                unsigned CurrentModifiers) {
6643     assert(Cap.capturesVariable() && "Expected capture by reference only!");
6644 
6645     // A first private variable captured by reference will use only the
6646     // 'private ptr' and 'map to' flag. Return the right flags if the captured
6647     // declaration is known as first-private in this handler.
6648     if (FirstPrivateDecls.count(Cap.getCapturedVar()))
6649       return MappableExprsHandler::OMP_MAP_PRIVATE |
6650              MappableExprsHandler::OMP_MAP_TO;
6651     // Reduction variable  will use only the 'private ptr' and 'map to_from'
6652     // flag.
6653     if (ReductionDecls.count(Cap.getCapturedVar())) {
6654       return MappableExprsHandler::OMP_MAP_TO |
6655              MappableExprsHandler::OMP_MAP_FROM;
6656     }
6657 
6658     // We didn't modify anything.
6659     return CurrentModifiers;
6660   }
6661 
6662 public:
6663   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
6664       : CurDir(Dir), CGF(CGF) {
6665     // Extract firstprivate clause information.
6666     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
6667       for (const Expr *D : C->varlists())
6668         FirstPrivateDecls.insert(
6669             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
6670     for (const auto *C : Dir.getClausesOfKind<OMPReductionClause>()) {
6671       for (const Expr *D : C->varlists()) {
6672         ReductionDecls.insert(
6673             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
6674       }
6675     }
6676     // Extract device pointer clause information.
6677     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
6678       for (const auto &L : C->component_lists())
6679         DevPointersMap[L.first].push_back(L.second);
6680   }
6681 
6682   /// \brief Generate the base pointers, section pointers, sizes and map type
6683   /// bits for the provided map type, map modifier, and expression components.
6684   /// \a IsFirstComponent should be set to true if the provided set of
6685   /// components is the first associated with a capture.
6686   void generateInfoForComponentList(
6687       OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier,
6688       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6689       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
6690       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
6691       bool IsFirstComponentList, bool IsImplicit) const {
6692 
6693     // The following summarizes what has to be generated for each map and the
6694     // types below. The generated information is expressed in this order:
6695     // base pointer, section pointer, size, flags
6696     // (to add to the ones that come from the map type and modifier).
6697     //
6698     // double d;
6699     // int i[100];
6700     // float *p;
6701     //
6702     // struct S1 {
6703     //   int i;
6704     //   float f[50];
6705     // }
6706     // struct S2 {
6707     //   int i;
6708     //   float f[50];
6709     //   S1 s;
6710     //   double *p;
6711     //   struct S2 *ps;
6712     // }
6713     // S2 s;
6714     // S2 *ps;
6715     //
6716     // map(d)
6717     // &d, &d, sizeof(double), noflags
6718     //
6719     // map(i)
6720     // &i, &i, 100*sizeof(int), noflags
6721     //
6722     // map(i[1:23])
6723     // &i(=&i[0]), &i[1], 23*sizeof(int), noflags
6724     //
6725     // map(p)
6726     // &p, &p, sizeof(float*), noflags
6727     //
6728     // map(p[1:24])
6729     // p, &p[1], 24*sizeof(float), noflags
6730     //
6731     // map(s)
6732     // &s, &s, sizeof(S2), noflags
6733     //
6734     // map(s.i)
6735     // &s, &(s.i), sizeof(int), noflags
6736     //
6737     // map(s.s.f)
6738     // &s, &(s.i.f), 50*sizeof(int), noflags
6739     //
6740     // map(s.p)
6741     // &s, &(s.p), sizeof(double*), noflags
6742     //
6743     // map(s.p[:22], s.a s.b)
6744     // &s, &(s.p), sizeof(double*), noflags
6745     // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag
6746     //
6747     // map(s.ps)
6748     // &s, &(s.ps), sizeof(S2*), noflags
6749     //
6750     // map(s.ps->s.i)
6751     // &s, &(s.ps), sizeof(S2*), noflags
6752     // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag
6753     //
6754     // map(s.ps->ps)
6755     // &s, &(s.ps), sizeof(S2*), noflags
6756     // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag
6757     //
6758     // map(s.ps->ps->ps)
6759     // &s, &(s.ps), sizeof(S2*), noflags
6760     // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag
6761     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag
6762     //
6763     // map(s.ps->ps->s.f[:22])
6764     // &s, &(s.ps), sizeof(S2*), noflags
6765     // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag
6766     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag
6767     //
6768     // map(ps)
6769     // &ps, &ps, sizeof(S2*), noflags
6770     //
6771     // map(ps->i)
6772     // ps, &(ps->i), sizeof(int), noflags
6773     //
6774     // map(ps->s.f)
6775     // ps, &(ps->s.f[0]), 50*sizeof(float), noflags
6776     //
6777     // map(ps->p)
6778     // ps, &(ps->p), sizeof(double*), noflags
6779     //
6780     // map(ps->p[:22])
6781     // ps, &(ps->p), sizeof(double*), noflags
6782     // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag
6783     //
6784     // map(ps->ps)
6785     // ps, &(ps->ps), sizeof(S2*), noflags
6786     //
6787     // map(ps->ps->s.i)
6788     // ps, &(ps->ps), sizeof(S2*), noflags
6789     // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag
6790     //
6791     // map(ps->ps->ps)
6792     // ps, &(ps->ps), sizeof(S2*), noflags
6793     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag
6794     //
6795     // map(ps->ps->ps->ps)
6796     // ps, &(ps->ps), sizeof(S2*), noflags
6797     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag
6798     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag
6799     //
6800     // map(ps->ps->ps->s.f[:22])
6801     // ps, &(ps->ps), sizeof(S2*), noflags
6802     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag
6803     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag
6804 
6805     // Track if the map information being generated is the first for a capture.
6806     bool IsCaptureFirstInfo = IsFirstComponentList;
6807     bool IsLink = false; // Is this variable a "declare target link"?
6808 
6809     // Scan the components from the base to the complete expression.
6810     auto CI = Components.rbegin();
6811     auto CE = Components.rend();
6812     auto I = CI;
6813 
6814     // Track if the map information being generated is the first for a list of
6815     // components.
6816     bool IsExpressionFirstInfo = true;
6817     llvm::Value *BP = nullptr;
6818 
6819     if (const auto *ME = dyn_cast<MemberExpr>(I->getAssociatedExpression())) {
6820       // The base is the 'this' pointer. The content of the pointer is going
6821       // to be the base of the field being mapped.
6822       BP = CGF.EmitScalarExpr(ME->getBase());
6823     } else {
6824       // The base is the reference to the variable.
6825       // BP = &Var.
6826       BP = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getPointer();
6827       if (const auto *VD =
6828               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
6829         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
6830             isDeclareTargetDeclaration(VD))
6831           if (*Res == OMPDeclareTargetDeclAttr::MT_Link) {
6832             IsLink = true;
6833             BP = CGF.CGM.getOpenMPRuntime()
6834                      .getAddrOfDeclareTargetLink(VD)
6835                      .getPointer();
6836           }
6837       }
6838 
6839       // If the variable is a pointer and is being dereferenced (i.e. is not
6840       // the last component), the base has to be the pointer itself, not its
6841       // reference. References are ignored for mapping purposes.
6842       QualType Ty =
6843           I->getAssociatedDeclaration()->getType().getNonReferenceType();
6844       if (Ty->isAnyPointerType() && std::next(I) != CE) {
6845         LValue PtrAddr = CGF.MakeNaturalAlignAddrLValue(BP, Ty);
6846         BP = CGF.EmitLoadOfPointerLValue(PtrAddr.getAddress(),
6847                                          Ty->castAs<PointerType>())
6848                  .getPointer();
6849 
6850         // We do not need to generate individual map information for the
6851         // pointer, it can be associated with the combined storage.
6852         ++I;
6853       }
6854     }
6855 
6856     uint64_t DefaultFlags = IsImplicit ? OMP_MAP_IMPLICIT : 0;
6857     for (; I != CE; ++I) {
6858       auto Next = std::next(I);
6859 
6860       // We need to generate the addresses and sizes if this is the last
6861       // component, if the component is a pointer or if it is an array section
6862       // whose length can't be proved to be one. If this is a pointer, it
6863       // becomes the base address for the following components.
6864 
6865       // A final array section, is one whose length can't be proved to be one.
6866       bool IsFinalArraySection =
6867           isFinalArraySectionExpression(I->getAssociatedExpression());
6868 
6869       // Get information on whether the element is a pointer. Have to do a
6870       // special treatment for array sections given that they are built-in
6871       // types.
6872       const auto *OASE =
6873           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
6874       bool IsPointer =
6875           (OASE &&
6876            OMPArraySectionExpr::getBaseOriginalType(OASE)
6877                .getCanonicalType()
6878                ->isAnyPointerType()) ||
6879           I->getAssociatedExpression()->getType()->isAnyPointerType();
6880 
6881       if (Next == CE || IsPointer || IsFinalArraySection) {
6882         // If this is not the last component, we expect the pointer to be
6883         // associated with an array expression or member expression.
6884         assert((Next == CE ||
6885                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
6886                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
6887                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
6888                "Unexpected expression");
6889 
6890         llvm::Value *LB =
6891             CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getPointer();
6892         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
6893 
6894         // If we have a member expression and the current component is a
6895         // reference, we have to map the reference too. Whenever we have a
6896         // reference, the section that reference refers to is going to be a
6897         // load instruction from the storage assigned to the reference.
6898         if (isa<MemberExpr>(I->getAssociatedExpression()) &&
6899             I->getAssociatedDeclaration()->getType()->isReferenceType()) {
6900           auto *LI = cast<llvm::LoadInst>(LB);
6901           llvm::Value *RefAddr = LI->getPointerOperand();
6902 
6903           BasePointers.push_back(BP);
6904           Pointers.push_back(RefAddr);
6905           Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
6906           Types.push_back(DefaultFlags |
6907                           getMapTypeBits(
6908                               /*MapType*/ OMPC_MAP_alloc,
6909                               /*MapTypeModifier=*/OMPC_MAP_unknown,
6910                               !IsExpressionFirstInfo, IsCaptureFirstInfo));
6911           IsExpressionFirstInfo = false;
6912           IsCaptureFirstInfo = false;
6913           // The reference will be the next base address.
6914           BP = RefAddr;
6915         }
6916 
6917         BasePointers.push_back(BP);
6918         Pointers.push_back(LB);
6919         Sizes.push_back(Size);
6920 
6921         // We need to add a pointer flag for each map that comes from the
6922         // same expression except for the first one. We also need to signal
6923         // this map is the first one that relates with the current capture
6924         // (there is a set of entries for each capture).
6925         Types.push_back(DefaultFlags |
6926                         getMapTypeBits(MapType, MapTypeModifier,
6927                                        !IsExpressionFirstInfo || IsLink,
6928                                        IsCaptureFirstInfo && !IsLink));
6929 
6930         // If we have a final array section, we are done with this expression.
6931         if (IsFinalArraySection)
6932           break;
6933 
6934         // The pointer becomes the base for the next element.
6935         if (Next != CE)
6936           BP = LB;
6937 
6938         IsExpressionFirstInfo = false;
6939         IsCaptureFirstInfo = false;
6940       }
6941     }
6942   }
6943 
6944   /// \brief Generate all the base pointers, section pointers, sizes and map
6945   /// types for the extracted mappable expressions. Also, for each item that
6946   /// relates with a device pointer, a pair of the relevant declaration and
6947   /// index where it occurs is appended to the device pointers info array.
6948   void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
6949                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
6950                        MapFlagsArrayTy &Types) const {
6951     BasePointers.clear();
6952     Pointers.clear();
6953     Sizes.clear();
6954     Types.clear();
6955 
6956     struct MapInfo {
6957       /// Kind that defines how a device pointer has to be returned.
6958       enum ReturnPointerKind {
6959         // Don't have to return any pointer.
6960         RPK_None,
6961         // Pointer is the base of the declaration.
6962         RPK_Base,
6963         // Pointer is a member of the base declaration - 'this'
6964         RPK_Member,
6965         // Pointer is a reference and a member of the base declaration - 'this'
6966         RPK_MemberReference,
6967       };
6968       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
6969       OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
6970       OpenMPMapClauseKind MapTypeModifier = OMPC_MAP_unknown;
6971       ReturnPointerKind ReturnDevicePointer = RPK_None;
6972       bool IsImplicit = false;
6973 
6974       MapInfo() = default;
6975       MapInfo(
6976           OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6977           OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier,
6978           ReturnPointerKind ReturnDevicePointer, bool IsImplicit)
6979           : Components(Components), MapType(MapType),
6980             MapTypeModifier(MapTypeModifier),
6981             ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
6982     };
6983 
6984     // We have to process the component lists that relate with the same
6985     // declaration in a single chunk so that we can generate the map flags
6986     // correctly. Therefore, we organize all lists in a map.
6987     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
6988 
6989     // Helper function to fill the information map for the different supported
6990     // clauses.
6991     auto &&InfoGen = [&Info](
6992         const ValueDecl *D,
6993         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
6994         OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier,
6995         MapInfo::ReturnPointerKind ReturnDevicePointer, bool IsImplicit) {
6996       const ValueDecl *VD =
6997           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
6998       Info[VD].emplace_back(L, MapType, MapModifier, ReturnDevicePointer,
6999                             IsImplicit);
7000     };
7001 
7002     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
7003     for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
7004       for (const auto &L : C->component_lists()) {
7005         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier(),
7006                 MapInfo::RPK_None, C->isImplicit());
7007       }
7008     for (const auto *C : this->CurDir.getClausesOfKind<OMPToClause>())
7009       for (const auto &L : C->component_lists()) {
7010         InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown,
7011                 MapInfo::RPK_None, C->isImplicit());
7012       }
7013     for (const auto *C : this->CurDir.getClausesOfKind<OMPFromClause>())
7014       for (const auto &L : C->component_lists()) {
7015         InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown,
7016                 MapInfo::RPK_None, C->isImplicit());
7017       }
7018 
7019     // Look at the use_device_ptr clause information and mark the existing map
7020     // entries as such. If there is no map information for an entry in the
7021     // use_device_ptr list, we create one with map type 'alloc' and zero size
7022     // section. It is the user fault if that was not mapped before.
7023     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
7024     for (const auto *C : this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>())
7025       for (const auto &L : C->component_lists()) {
7026         assert(!L.second.empty() && "Not expecting empty list of components!");
7027         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
7028         VD = cast<ValueDecl>(VD->getCanonicalDecl());
7029         const Expr *IE = L.second.back().getAssociatedExpression();
7030         // If the first component is a member expression, we have to look into
7031         // 'this', which maps to null in the map of map information. Otherwise
7032         // look directly for the information.
7033         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7034 
7035         // We potentially have map information for this declaration already.
7036         // Look for the first set of components that refer to it.
7037         if (It != Info.end()) {
7038           auto CI = std::find_if(
7039               It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
7040                 return MI.Components.back().getAssociatedDeclaration() == VD;
7041               });
7042           // If we found a map entry, signal that the pointer has to be returned
7043           // and move on to the next declaration.
7044           if (CI != It->second.end()) {
7045             CI->ReturnDevicePointer = isa<MemberExpr>(IE)
7046                                           ? (VD->getType()->isReferenceType()
7047                                                  ? MapInfo::RPK_MemberReference
7048                                                  : MapInfo::RPK_Member)
7049                                           : MapInfo::RPK_Base;
7050             continue;
7051           }
7052         }
7053 
7054         // We didn't find any match in our map information - generate a zero
7055         // size array section.
7056         // FIXME: MSVC 2013 seems to require this-> to find member CGF.
7057         llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(IE),
7058                                                       IE->getExprLoc());
7059         BasePointers.push_back({Ptr, VD});
7060         Pointers.push_back(Ptr);
7061         Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy));
7062         Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
7063       }
7064 
7065     for (const auto &M : Info) {
7066       // We need to know when we generate information for the first component
7067       // associated with a capture, because the mapping flags depend on it.
7068       bool IsFirstComponentList = true;
7069       for (const MapInfo &L : M.second) {
7070         assert(!L.Components.empty() &&
7071                "Not expecting declaration with no component lists.");
7072 
7073         // Remember the current base pointer index.
7074         unsigned CurrentBasePointersIdx = BasePointers.size();
7075         // FIXME: MSVC 2013 seems to require this-> to find the member method.
7076         this->generateInfoForComponentList(
7077             L.MapType, L.MapTypeModifier, L.Components, BasePointers, Pointers,
7078             Sizes, Types, IsFirstComponentList, L.IsImplicit);
7079 
7080         // If this entry relates with a device pointer, set the relevant
7081         // declaration and add the 'return pointer' flag.
7082         if (IsFirstComponentList &&
7083             L.ReturnDevicePointer != MapInfo::RPK_None) {
7084           // If the pointer is not the base of the map, we need to skip the
7085           // base. If it is a reference in a member field, we also need to skip
7086           // the map of the reference.
7087           if (L.ReturnDevicePointer != MapInfo::RPK_Base) {
7088             ++CurrentBasePointersIdx;
7089             if (L.ReturnDevicePointer == MapInfo::RPK_MemberReference)
7090               ++CurrentBasePointersIdx;
7091           }
7092           assert(BasePointers.size() > CurrentBasePointersIdx &&
7093                  "Unexpected number of mapped base pointers.");
7094 
7095           const ValueDecl *RelevantVD =
7096               L.Components.back().getAssociatedDeclaration();
7097           assert(RelevantVD &&
7098                  "No relevant declaration related with device pointer??");
7099 
7100           BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
7101           Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
7102         }
7103         IsFirstComponentList = false;
7104       }
7105     }
7106   }
7107 
7108   /// \brief Generate the base pointers, section pointers, sizes and map types
7109   /// associated to a given capture.
7110   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
7111                               llvm::Value *Arg,
7112                               MapBaseValuesArrayTy &BasePointers,
7113                               MapValuesArrayTy &Pointers,
7114                               MapValuesArrayTy &Sizes,
7115                               MapFlagsArrayTy &Types) const {
7116     assert(!Cap->capturesVariableArrayType() &&
7117            "Not expecting to generate map info for a variable array type!");
7118 
7119     BasePointers.clear();
7120     Pointers.clear();
7121     Sizes.clear();
7122     Types.clear();
7123 
7124     // We need to know when we generating information for the first component
7125     // associated with a capture, because the mapping flags depend on it.
7126     bool IsFirstComponentList = true;
7127 
7128     const ValueDecl *VD =
7129         Cap->capturesThis()
7130             ? nullptr
7131             : Cap->getCapturedVar()->getCanonicalDecl();
7132 
7133     // If this declaration appears in a is_device_ptr clause we just have to
7134     // pass the pointer by value. If it is a reference to a declaration, we just
7135     // pass its value, otherwise, if it is a member expression, we need to map
7136     // 'to' the field.
7137     if (!VD) {
7138       auto It = DevPointersMap.find(VD);
7139       if (It != DevPointersMap.end()) {
7140         for (ArrayRef<OMPClauseMappableExprCommon::MappableComponent> L :
7141              It->second) {
7142           generateInfoForComponentList(
7143               /*MapType=*/OMPC_MAP_to, /*MapTypeModifier=*/OMPC_MAP_unknown, L,
7144               BasePointers, Pointers, Sizes, Types, IsFirstComponentList,
7145               /*IsImplicit=*/false);
7146           IsFirstComponentList = false;
7147         }
7148         return;
7149       }
7150     } else if (DevPointersMap.count(VD)) {
7151       BasePointers.emplace_back(Arg, VD);
7152       Pointers.push_back(Arg);
7153       Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
7154       Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
7155       return;
7156     }
7157 
7158     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
7159     for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
7160       for (const auto &L : C->decl_component_lists(VD)) {
7161         assert(L.first == VD &&
7162                "We got information for the wrong declaration??");
7163         assert(!L.second.empty() &&
7164                "Not expecting declaration with no component lists.");
7165         generateInfoForComponentList(
7166             C->getMapType(), C->getMapTypeModifier(), L.second, BasePointers,
7167             Pointers, Sizes, Types, IsFirstComponentList, C->isImplicit());
7168         IsFirstComponentList = false;
7169       }
7170 
7171     return;
7172   }
7173 
7174   /// \brief Generate the default map information for a given capture \a CI,
7175   /// record field declaration \a RI and captured value \a CV.
7176   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
7177                               const FieldDecl &RI, llvm::Value *CV,
7178                               MapBaseValuesArrayTy &CurBasePointers,
7179                               MapValuesArrayTy &CurPointers,
7180                               MapValuesArrayTy &CurSizes,
7181                               MapFlagsArrayTy &CurMapTypes) {
7182 
7183     // Do the default mapping.
7184     if (CI.capturesThis()) {
7185       CurBasePointers.push_back(CV);
7186       CurPointers.push_back(CV);
7187       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
7188       CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType()));
7189       // Default map type.
7190       CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
7191     } else if (CI.capturesVariableByCopy()) {
7192       CurBasePointers.push_back(CV);
7193       CurPointers.push_back(CV);
7194       if (!RI.getType()->isAnyPointerType()) {
7195         // We have to signal to the runtime captures passed by value that are
7196         // not pointers.
7197         CurMapTypes.push_back(OMP_MAP_LITERAL);
7198         CurSizes.push_back(CGF.getTypeSize(RI.getType()));
7199       } else {
7200         // Pointers are implicitly mapped with a zero size and no flags
7201         // (other than first map that is added for all implicit maps).
7202         CurMapTypes.push_back(0u);
7203         CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy));
7204       }
7205     } else {
7206       assert(CI.capturesVariable() && "Expected captured reference.");
7207       CurBasePointers.push_back(CV);
7208       CurPointers.push_back(CV);
7209 
7210       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
7211       QualType ElementType = PtrTy->getPointeeType();
7212       CurSizes.push_back(CGF.getTypeSize(ElementType));
7213       // The default map type for a scalar/complex type is 'to' because by
7214       // default the value doesn't have to be retrieved. For an aggregate
7215       // type, the default is 'tofrom'.
7216       CurMapTypes.emplace_back(adjustMapModifiersForPrivateClauses(
7217           CI, ElementType->isAggregateType() ? (OMP_MAP_TO | OMP_MAP_FROM)
7218                                              : OMP_MAP_TO));
7219     }
7220     // Every default map produces a single argument which is a target parameter.
7221     CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
7222   }
7223 };
7224 
7225 enum OpenMPOffloadingReservedDeviceIDs {
7226   /// \brief Device ID if the device was not defined, runtime should get it
7227   /// from environment variables in the spec.
7228   OMP_DEVICEID_UNDEF = -1,
7229 };
7230 } // anonymous namespace
7231 
7232 /// \brief Emit the arrays used to pass the captures and map information to the
7233 /// offloading runtime library. If there is no map or capture information,
7234 /// return nullptr by reference.
7235 static void
7236 emitOffloadingArrays(CodeGenFunction &CGF,
7237                      MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
7238                      MappableExprsHandler::MapValuesArrayTy &Pointers,
7239                      MappableExprsHandler::MapValuesArrayTy &Sizes,
7240                      MappableExprsHandler::MapFlagsArrayTy &MapTypes,
7241                      CGOpenMPRuntime::TargetDataInfo &Info) {
7242   CodeGenModule &CGM = CGF.CGM;
7243   ASTContext &Ctx = CGF.getContext();
7244 
7245   // Reset the array information.
7246   Info.clearArrayInfo();
7247   Info.NumberOfPtrs = BasePointers.size();
7248 
7249   if (Info.NumberOfPtrs) {
7250     // Detect if we have any capture size requiring runtime evaluation of the
7251     // size so that a constant array could be eventually used.
7252     bool hasRuntimeEvaluationCaptureSize = false;
7253     for (llvm::Value *S : Sizes)
7254       if (!isa<llvm::Constant>(S)) {
7255         hasRuntimeEvaluationCaptureSize = true;
7256         break;
7257       }
7258 
7259     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
7260     QualType PointerArrayType =
7261         Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
7262                                  /*IndexTypeQuals=*/0);
7263 
7264     Info.BasePointersArray =
7265         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
7266     Info.PointersArray =
7267         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
7268 
7269     // If we don't have any VLA types or other types that require runtime
7270     // evaluation, we can use a constant array for the map sizes, otherwise we
7271     // need to fill up the arrays as we do for the pointers.
7272     if (hasRuntimeEvaluationCaptureSize) {
7273       QualType SizeArrayType = Ctx.getConstantArrayType(
7274           Ctx.getSizeType(), PointerNumAP, ArrayType::Normal,
7275           /*IndexTypeQuals=*/0);
7276       Info.SizesArray =
7277           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
7278     } else {
7279       // We expect all the sizes to be constant, so we collect them to create
7280       // a constant array.
7281       SmallVector<llvm::Constant *, 16> ConstSizes;
7282       for (llvm::Value *S : Sizes)
7283         ConstSizes.push_back(cast<llvm::Constant>(S));
7284 
7285       auto *SizesArrayInit = llvm::ConstantArray::get(
7286           llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes);
7287       auto *SizesArrayGbl = new llvm::GlobalVariable(
7288           CGM.getModule(), SizesArrayInit->getType(),
7289           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
7290           SizesArrayInit, ".offload_sizes");
7291       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
7292       Info.SizesArray = SizesArrayGbl;
7293     }
7294 
7295     // The map types are always constant so we don't need to generate code to
7296     // fill arrays. Instead, we create an array constant.
7297     llvm::Constant *MapTypesArrayInit =
7298         llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes);
7299     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
7300         CGM.getModule(), MapTypesArrayInit->getType(),
7301         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
7302         MapTypesArrayInit, ".offload_maptypes");
7303     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
7304     Info.MapTypesArray = MapTypesArrayGbl;
7305 
7306     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
7307       llvm::Value *BPVal = *BasePointers[I];
7308       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
7309           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
7310           Info.BasePointersArray, 0, I);
7311       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7312           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
7313       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
7314       CGF.Builder.CreateStore(BPVal, BPAddr);
7315 
7316       if (Info.requiresDevicePointerInfo())
7317         if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
7318           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
7319 
7320       llvm::Value *PVal = Pointers[I];
7321       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
7322           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
7323           Info.PointersArray, 0, I);
7324       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7325           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
7326       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
7327       CGF.Builder.CreateStore(PVal, PAddr);
7328 
7329       if (hasRuntimeEvaluationCaptureSize) {
7330         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
7331             llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs),
7332             Info.SizesArray,
7333             /*Idx0=*/0,
7334             /*Idx1=*/I);
7335         Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType()));
7336         CGF.Builder.CreateStore(
7337             CGF.Builder.CreateIntCast(Sizes[I], CGM.SizeTy, /*isSigned=*/true),
7338             SAddr);
7339       }
7340     }
7341   }
7342 }
7343 /// \brief Emit the arguments to be passed to the runtime library based on the
7344 /// arrays of pointers, sizes and map types.
7345 static void emitOffloadingArraysArgument(
7346     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
7347     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
7348     llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
7349   CodeGenModule &CGM = CGF.CGM;
7350   if (Info.NumberOfPtrs) {
7351     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
7352         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
7353         Info.BasePointersArray,
7354         /*Idx0=*/0, /*Idx1=*/0);
7355     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
7356         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
7357         Info.PointersArray,
7358         /*Idx0=*/0,
7359         /*Idx1=*/0);
7360     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
7361         llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray,
7362         /*Idx0=*/0, /*Idx1=*/0);
7363     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
7364         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
7365         Info.MapTypesArray,
7366         /*Idx0=*/0,
7367         /*Idx1=*/0);
7368   } else {
7369     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
7370     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
7371     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo());
7372     MapTypesArrayArg =
7373         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
7374   }
7375 }
7376 
7377 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
7378                                      const OMPExecutableDirective &D,
7379                                      llvm::Value *OutlinedFn,
7380                                      llvm::Value *OutlinedFnID,
7381                                      const Expr *IfCond, const Expr *Device) {
7382   if (!CGF.HaveInsertPoint())
7383     return;
7384 
7385   assert(OutlinedFn && "Invalid outlined function!");
7386 
7387   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
7388   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
7389   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
7390   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
7391                                             PrePostActionTy &) {
7392     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
7393   };
7394   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
7395 
7396   CodeGenFunction::OMPTargetDataInfo InputInfo;
7397   llvm::Value *MapTypesArray = nullptr;
7398   // Fill up the pointer arrays and transfer execution to the device.
7399   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
7400                     &MapTypesArray, &CS, RequiresOuterTask,
7401                     &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) {
7402     // On top of the arrays that were filled up, the target offloading call
7403     // takes as arguments the device id as well as the host pointer. The host
7404     // pointer is used by the runtime library to identify the current target
7405     // region, so it only has to be unique and not necessarily point to
7406     // anything. It could be the pointer to the outlined function that
7407     // implements the target region, but we aren't using that so that the
7408     // compiler doesn't need to keep that, and could therefore inline the host
7409     // function if proven worthwhile during optimization.
7410 
7411     // From this point on, we need to have an ID of the target region defined.
7412     assert(OutlinedFnID && "Invalid outlined function ID!");
7413 
7414     // Emit device ID if any.
7415     llvm::Value *DeviceID;
7416     if (Device) {
7417       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
7418                                            CGF.Int64Ty, /*isSigned=*/true);
7419     } else {
7420       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
7421     }
7422 
7423     // Emit the number of elements in the offloading arrays.
7424     llvm::Value *PointerNum =
7425         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
7426 
7427     // Return value of the runtime offloading call.
7428     llvm::Value *Return;
7429 
7430     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(*this, CGF, D);
7431     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(*this, CGF, D);
7432 
7433     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
7434     // The target region is an outlined function launched by the runtime
7435     // via calls __tgt_target() or __tgt_target_teams().
7436     //
7437     // __tgt_target() launches a target region with one team and one thread,
7438     // executing a serial region.  This master thread may in turn launch
7439     // more threads within its team upon encountering a parallel region,
7440     // however, no additional teams can be launched on the device.
7441     //
7442     // __tgt_target_teams() launches a target region with one or more teams,
7443     // each with one or more threads.  This call is required for target
7444     // constructs such as:
7445     //  'target teams'
7446     //  'target' / 'teams'
7447     //  'target teams distribute parallel for'
7448     //  'target parallel'
7449     // and so on.
7450     //
7451     // Note that on the host and CPU targets, the runtime implementation of
7452     // these calls simply call the outlined function without forking threads.
7453     // The outlined functions themselves have runtime calls to
7454     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
7455     // the compiler in emitTeamsCall() and emitParallelCall().
7456     //
7457     // In contrast, on the NVPTX target, the implementation of
7458     // __tgt_target_teams() launches a GPU kernel with the requested number
7459     // of teams and threads so no additional calls to the runtime are required.
7460     if (NumTeams) {
7461       // If we have NumTeams defined this means that we have an enclosed teams
7462       // region. Therefore we also expect to have NumThreads defined. These two
7463       // values should be defined in the presence of a teams directive,
7464       // regardless of having any clauses associated. If the user is using teams
7465       // but no clauses, these two values will be the default that should be
7466       // passed to the runtime library - a 32-bit integer with the value zero.
7467       assert(NumThreads && "Thread limit expression should be available along "
7468                            "with number of teams.");
7469       llvm::Value *OffloadingArgs[] = {DeviceID,
7470                                        OutlinedFnID,
7471                                        PointerNum,
7472                                        InputInfo.BasePointersArray.getPointer(),
7473                                        InputInfo.PointersArray.getPointer(),
7474                                        InputInfo.SizesArray.getPointer(),
7475                                        MapTypesArray,
7476                                        NumTeams,
7477                                        NumThreads};
7478       Return = CGF.EmitRuntimeCall(
7479           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
7480                                           : OMPRTL__tgt_target_teams),
7481           OffloadingArgs);
7482     } else {
7483       llvm::Value *OffloadingArgs[] = {DeviceID,
7484                                        OutlinedFnID,
7485                                        PointerNum,
7486                                        InputInfo.BasePointersArray.getPointer(),
7487                                        InputInfo.PointersArray.getPointer(),
7488                                        InputInfo.SizesArray.getPointer(),
7489                                        MapTypesArray};
7490       Return = CGF.EmitRuntimeCall(
7491           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
7492                                           : OMPRTL__tgt_target),
7493           OffloadingArgs);
7494     }
7495 
7496     // Check the error code and execute the host version if required.
7497     llvm::BasicBlock *OffloadFailedBlock =
7498         CGF.createBasicBlock("omp_offload.failed");
7499     llvm::BasicBlock *OffloadContBlock =
7500         CGF.createBasicBlock("omp_offload.cont");
7501     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
7502     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
7503 
7504     CGF.EmitBlock(OffloadFailedBlock);
7505     if (RequiresOuterTask) {
7506       CapturedVars.clear();
7507       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
7508     }
7509     emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, CapturedVars);
7510     CGF.EmitBranch(OffloadContBlock);
7511 
7512     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
7513   };
7514 
7515   // Notify that the host version must be executed.
7516   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
7517                     RequiresOuterTask](CodeGenFunction &CGF,
7518                                        PrePostActionTy &) {
7519     if (RequiresOuterTask) {
7520       CapturedVars.clear();
7521       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
7522     }
7523     emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, CapturedVars);
7524   };
7525 
7526   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
7527                           &CapturedVars, RequiresOuterTask,
7528                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
7529     // Fill up the arrays with all the captured variables.
7530     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
7531     MappableExprsHandler::MapValuesArrayTy Pointers;
7532     MappableExprsHandler::MapValuesArrayTy Sizes;
7533     MappableExprsHandler::MapFlagsArrayTy MapTypes;
7534 
7535     MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
7536     MappableExprsHandler::MapValuesArrayTy CurPointers;
7537     MappableExprsHandler::MapValuesArrayTy CurSizes;
7538     MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
7539 
7540     // Get mappable expression information.
7541     MappableExprsHandler MEHandler(D, CGF);
7542 
7543     auto RI = CS.getCapturedRecordDecl()->field_begin();
7544     auto CV = CapturedVars.begin();
7545     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
7546                                               CE = CS.capture_end();
7547          CI != CE; ++CI, ++RI, ++CV) {
7548       CurBasePointers.clear();
7549       CurPointers.clear();
7550       CurSizes.clear();
7551       CurMapTypes.clear();
7552 
7553       // VLA sizes are passed to the outlined region by copy and do not have map
7554       // information associated.
7555       if (CI->capturesVariableArrayType()) {
7556         CurBasePointers.push_back(*CV);
7557         CurPointers.push_back(*CV);
7558         CurSizes.push_back(CGF.getTypeSize(RI->getType()));
7559         // Copy to the device as an argument. No need to retrieve it.
7560         CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
7561                               MappableExprsHandler::OMP_MAP_TARGET_PARAM);
7562       } else {
7563         // If we have any information in the map clause, we use it, otherwise we
7564         // just do a default mapping.
7565         MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
7566                                          CurSizes, CurMapTypes);
7567         if (CurBasePointers.empty())
7568           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
7569                                            CurPointers, CurSizes, CurMapTypes);
7570       }
7571       // We expect to have at least an element of information for this capture.
7572       assert(!CurBasePointers.empty() &&
7573              "Non-existing map pointer for capture!");
7574       assert(CurBasePointers.size() == CurPointers.size() &&
7575              CurBasePointers.size() == CurSizes.size() &&
7576              CurBasePointers.size() == CurMapTypes.size() &&
7577              "Inconsistent map information sizes!");
7578 
7579       // We need to append the results of this capture to what we already have.
7580       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
7581       Pointers.append(CurPointers.begin(), CurPointers.end());
7582       Sizes.append(CurSizes.begin(), CurSizes.end());
7583       MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
7584     }
7585     // Map other list items in the map clause which are not captured variables
7586     // but "declare target link" global variables.
7587     for (const auto *C : D.getClausesOfKind<OMPMapClause>()) {
7588       for (const auto &L : C->component_lists()) {
7589         if (!L.first)
7590           continue;
7591         const auto *VD = dyn_cast<VarDecl>(L.first);
7592         if (!VD)
7593           continue;
7594         llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7595             isDeclareTargetDeclaration(VD);
7596         if (!Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
7597           continue;
7598         MEHandler.generateInfoForComponentList(
7599             C->getMapType(), C->getMapTypeModifier(), L.second, BasePointers,
7600             Pointers, Sizes, MapTypes, /*IsFirstComponentList=*/true,
7601             C->isImplicit());
7602       }
7603     }
7604 
7605     TargetDataInfo Info;
7606     // Fill up the arrays and create the arguments.
7607     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
7608     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
7609                                  Info.PointersArray, Info.SizesArray,
7610                                  Info.MapTypesArray, Info);
7611     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
7612     InputInfo.BasePointersArray =
7613         Address(Info.BasePointersArray, CGM.getPointerAlign());
7614     InputInfo.PointersArray =
7615         Address(Info.PointersArray, CGM.getPointerAlign());
7616     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
7617     MapTypesArray = Info.MapTypesArray;
7618     if (RequiresOuterTask)
7619       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
7620     else
7621       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
7622   };
7623 
7624   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
7625                              CodeGenFunction &CGF, PrePostActionTy &) {
7626     if (RequiresOuterTask) {
7627       CodeGenFunction::OMPTargetDataInfo InputInfo;
7628       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
7629     } else {
7630       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
7631     }
7632   };
7633 
7634   // If we have a target function ID it means that we need to support
7635   // offloading, otherwise, just execute on the host. We need to execute on host
7636   // regardless of the conditional in the if clause if, e.g., the user do not
7637   // specify target triples.
7638   if (OutlinedFnID) {
7639     if (IfCond) {
7640       emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
7641     } else {
7642       RegionCodeGenTy ThenRCG(TargetThenGen);
7643       ThenRCG(CGF);
7644     }
7645   } else {
7646     RegionCodeGenTy ElseRCG(TargetElseGen);
7647     ElseRCG(CGF);
7648   }
7649 }
7650 
7651 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
7652                                                     StringRef ParentName) {
7653   if (!S)
7654     return;
7655 
7656   // Codegen OMP target directives that offload compute to the device.
7657   bool RequiresDeviceCodegen =
7658       isa<OMPExecutableDirective>(S) &&
7659       isOpenMPTargetExecutionDirective(
7660           cast<OMPExecutableDirective>(S)->getDirectiveKind());
7661 
7662   if (RequiresDeviceCodegen) {
7663     const auto &E = *cast<OMPExecutableDirective>(S);
7664     unsigned DeviceID;
7665     unsigned FileID;
7666     unsigned Line;
7667     getTargetEntryUniqueInfo(CGM.getContext(), E.getLocStart(), DeviceID,
7668                              FileID, Line);
7669 
7670     // Is this a target region that should not be emitted as an entry point? If
7671     // so just signal we are done with this target region.
7672     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
7673                                                             ParentName, Line))
7674       return;
7675 
7676     switch (E.getDirectiveKind()) {
7677     case OMPD_target:
7678       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
7679                                                    cast<OMPTargetDirective>(E));
7680       break;
7681     case OMPD_target_parallel:
7682       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
7683           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
7684       break;
7685     case OMPD_target_teams:
7686       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
7687           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
7688       break;
7689     case OMPD_target_teams_distribute:
7690       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
7691           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
7692       break;
7693     case OMPD_target_teams_distribute_simd:
7694       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
7695           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
7696       break;
7697     case OMPD_target_parallel_for:
7698       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
7699           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
7700       break;
7701     case OMPD_target_parallel_for_simd:
7702       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
7703           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
7704       break;
7705     case OMPD_target_simd:
7706       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
7707           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
7708       break;
7709     case OMPD_target_teams_distribute_parallel_for:
7710       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
7711           CGM, ParentName,
7712           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
7713       break;
7714     case OMPD_target_teams_distribute_parallel_for_simd:
7715       CodeGenFunction::
7716           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
7717               CGM, ParentName,
7718               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
7719       break;
7720     case OMPD_parallel:
7721     case OMPD_for:
7722     case OMPD_parallel_for:
7723     case OMPD_parallel_sections:
7724     case OMPD_for_simd:
7725     case OMPD_parallel_for_simd:
7726     case OMPD_cancel:
7727     case OMPD_cancellation_point:
7728     case OMPD_ordered:
7729     case OMPD_threadprivate:
7730     case OMPD_task:
7731     case OMPD_simd:
7732     case OMPD_sections:
7733     case OMPD_section:
7734     case OMPD_single:
7735     case OMPD_master:
7736     case OMPD_critical:
7737     case OMPD_taskyield:
7738     case OMPD_barrier:
7739     case OMPD_taskwait:
7740     case OMPD_taskgroup:
7741     case OMPD_atomic:
7742     case OMPD_flush:
7743     case OMPD_teams:
7744     case OMPD_target_data:
7745     case OMPD_target_exit_data:
7746     case OMPD_target_enter_data:
7747     case OMPD_distribute:
7748     case OMPD_distribute_simd:
7749     case OMPD_distribute_parallel_for:
7750     case OMPD_distribute_parallel_for_simd:
7751     case OMPD_teams_distribute:
7752     case OMPD_teams_distribute_simd:
7753     case OMPD_teams_distribute_parallel_for:
7754     case OMPD_teams_distribute_parallel_for_simd:
7755     case OMPD_target_update:
7756     case OMPD_declare_simd:
7757     case OMPD_declare_target:
7758     case OMPD_end_declare_target:
7759     case OMPD_declare_reduction:
7760     case OMPD_taskloop:
7761     case OMPD_taskloop_simd:
7762     case OMPD_unknown:
7763       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
7764     }
7765     return;
7766   }
7767 
7768   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
7769     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
7770       return;
7771 
7772     scanForTargetRegionsFunctions(
7773         E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
7774     return;
7775   }
7776 
7777   // If this is a lambda function, look into its body.
7778   if (const auto *L = dyn_cast<LambdaExpr>(S))
7779     S = L->getBody();
7780 
7781   // Keep looking for target regions recursively.
7782   for (const Stmt *II : S->children())
7783     scanForTargetRegionsFunctions(II, ParentName);
7784 }
7785 
7786 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
7787   const auto *FD = cast<FunctionDecl>(GD.getDecl());
7788 
7789   // If emitting code for the host, we do not process FD here. Instead we do
7790   // the normal code generation.
7791   if (!CGM.getLangOpts().OpenMPIsDevice)
7792     return false;
7793 
7794   // Try to detect target regions in the function.
7795   scanForTargetRegionsFunctions(FD->getBody(), CGM.getMangledName(GD));
7796 
7797   // Do not to emit function if it is not marked as declare target.
7798   return !isDeclareTargetDeclaration(FD);
7799 }
7800 
7801 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
7802   if (!CGM.getLangOpts().OpenMPIsDevice)
7803     return false;
7804 
7805   // Check if there are Ctors/Dtors in this declaration and look for target
7806   // regions in it. We use the complete variant to produce the kernel name
7807   // mangling.
7808   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
7809   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
7810     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
7811       StringRef ParentName =
7812           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
7813       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
7814     }
7815     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
7816       StringRef ParentName =
7817           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
7818       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
7819     }
7820   }
7821 
7822   // Do not to emit variable if it is not marked as declare target.
7823   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7824       isDeclareTargetDeclaration(cast<VarDecl>(GD.getDecl()));
7825   return !Res || *Res == OMPDeclareTargetDeclAttr::MT_Link;
7826 }
7827 
7828 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
7829                                                    llvm::Constant *Addr) {
7830   if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7831           isDeclareTargetDeclaration(VD)) {
7832     OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
7833     StringRef VarName;
7834     CharUnits VarSize;
7835     llvm::GlobalValue::LinkageTypes Linkage;
7836     switch (*Res) {
7837     case OMPDeclareTargetDeclAttr::MT_To:
7838       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
7839       VarName = CGM.getMangledName(VD);
7840       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
7841       Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
7842       break;
7843     case OMPDeclareTargetDeclAttr::MT_Link:
7844       // Map type 'to' because we do not map the original variable but the
7845       // reference.
7846       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
7847       if (!CGM.getLangOpts().OpenMPIsDevice) {
7848         Addr =
7849             cast<llvm::Constant>(getAddrOfDeclareTargetLink(VD).getPointer());
7850       }
7851       VarName = Addr->getName();
7852       VarSize = CGM.getPointerSize();
7853       Linkage = llvm::GlobalValue::WeakAnyLinkage;
7854       break;
7855     }
7856     OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
7857         VarName, Addr, VarSize, Flags, Linkage);
7858   }
7859 }
7860 
7861 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
7862   if (isa<FunctionDecl>(GD.getDecl()))
7863     return emitTargetFunctions(GD);
7864 
7865   return emitTargetGlobalVariable(GD);
7866 }
7867 
7868 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
7869     CodeGenModule &CGM)
7870     : CGM(CGM) {
7871   if (CGM.getLangOpts().OpenMPIsDevice) {
7872     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
7873     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
7874   }
7875 }
7876 
7877 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
7878   if (CGM.getLangOpts().OpenMPIsDevice)
7879     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
7880 }
7881 
7882 bool CGOpenMPRuntime::markAsGlobalTarget(const FunctionDecl *D) {
7883   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
7884     return true;
7885 
7886   const FunctionDecl *FD = D->getCanonicalDecl();
7887   // Do not to emit function if it is marked as declare target as it was already
7888   // emitted.
7889   if (isDeclareTargetDeclaration(D)) {
7890     if (D->hasBody() && AlreadyEmittedTargetFunctions.count(FD) == 0) {
7891       if (auto *F = dyn_cast_or_null<llvm::Function>(
7892               CGM.GetGlobalValue(CGM.getMangledName(D))))
7893         return !F->isDeclaration();
7894       return false;
7895     }
7896     return true;
7897   }
7898 
7899   // Do not mark member functions except for static.
7900   if (const auto *Method = dyn_cast<CXXMethodDecl>(FD))
7901     if (!Method->isStatic())
7902       return true;
7903 
7904   return !AlreadyEmittedTargetFunctions.insert(FD).second;
7905 }
7906 
7907 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
7908   // If we have offloading in the current module, we need to emit the entries
7909   // now and register the offloading descriptor.
7910   createOffloadEntriesAndInfoMetadata();
7911 
7912   // Create and register the offloading binary descriptors. This is the main
7913   // entity that captures all the information about offloading in the current
7914   // compilation unit.
7915   return createOffloadingBinaryDescriptorRegistration();
7916 }
7917 
7918 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
7919                                     const OMPExecutableDirective &D,
7920                                     SourceLocation Loc,
7921                                     llvm::Value *OutlinedFn,
7922                                     ArrayRef<llvm::Value *> CapturedVars) {
7923   if (!CGF.HaveInsertPoint())
7924     return;
7925 
7926   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
7927   CodeGenFunction::RunCleanupsScope Scope(CGF);
7928 
7929   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
7930   llvm::Value *Args[] = {
7931       RTLoc,
7932       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
7933       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
7934   llvm::SmallVector<llvm::Value *, 16> RealArgs;
7935   RealArgs.append(std::begin(Args), std::end(Args));
7936   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
7937 
7938   llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
7939   CGF.EmitRuntimeCall(RTLFn, RealArgs);
7940 }
7941 
7942 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
7943                                          const Expr *NumTeams,
7944                                          const Expr *ThreadLimit,
7945                                          SourceLocation Loc) {
7946   if (!CGF.HaveInsertPoint())
7947     return;
7948 
7949   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
7950 
7951   llvm::Value *NumTeamsVal =
7952       NumTeams
7953           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
7954                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
7955           : CGF.Builder.getInt32(0);
7956 
7957   llvm::Value *ThreadLimitVal =
7958       ThreadLimit
7959           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
7960                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
7961           : CGF.Builder.getInt32(0);
7962 
7963   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
7964   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
7965                                      ThreadLimitVal};
7966   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
7967                       PushNumTeamsArgs);
7968 }
7969 
7970 void CGOpenMPRuntime::emitTargetDataCalls(
7971     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
7972     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
7973   if (!CGF.HaveInsertPoint())
7974     return;
7975 
7976   // Action used to replace the default codegen action and turn privatization
7977   // off.
7978   PrePostActionTy NoPrivAction;
7979 
7980   // Generate the code for the opening of the data environment. Capture all the
7981   // arguments of the runtime call by reference because they are used in the
7982   // closing of the region.
7983   auto &&BeginThenGen = [this, &D, Device, &Info,
7984                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
7985     // Fill up the arrays with all the mapped variables.
7986     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
7987     MappableExprsHandler::MapValuesArrayTy Pointers;
7988     MappableExprsHandler::MapValuesArrayTy Sizes;
7989     MappableExprsHandler::MapFlagsArrayTy MapTypes;
7990 
7991     // Get map clause information.
7992     MappableExprsHandler MCHandler(D, CGF);
7993     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
7994 
7995     // Fill up the arrays and create the arguments.
7996     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
7997 
7998     llvm::Value *BasePointersArrayArg = nullptr;
7999     llvm::Value *PointersArrayArg = nullptr;
8000     llvm::Value *SizesArrayArg = nullptr;
8001     llvm::Value *MapTypesArrayArg = nullptr;
8002     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
8003                                  SizesArrayArg, MapTypesArrayArg, Info);
8004 
8005     // Emit device ID if any.
8006     llvm::Value *DeviceID = nullptr;
8007     if (Device) {
8008       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
8009                                            CGF.Int64Ty, /*isSigned=*/true);
8010     } else {
8011       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
8012     }
8013 
8014     // Emit the number of elements in the offloading arrays.
8015     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
8016 
8017     llvm::Value *OffloadingArgs[] = {
8018         DeviceID,         PointerNum,    BasePointersArrayArg,
8019         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
8020     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin),
8021                         OffloadingArgs);
8022 
8023     // If device pointer privatization is required, emit the body of the region
8024     // here. It will have to be duplicated: with and without privatization.
8025     if (!Info.CaptureDeviceAddrMap.empty())
8026       CodeGen(CGF);
8027   };
8028 
8029   // Generate code for the closing of the data region.
8030   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
8031                                             PrePostActionTy &) {
8032     assert(Info.isValid() && "Invalid data environment closing arguments.");
8033 
8034     llvm::Value *BasePointersArrayArg = nullptr;
8035     llvm::Value *PointersArrayArg = nullptr;
8036     llvm::Value *SizesArrayArg = nullptr;
8037     llvm::Value *MapTypesArrayArg = nullptr;
8038     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
8039                                  SizesArrayArg, MapTypesArrayArg, Info);
8040 
8041     // Emit device ID if any.
8042     llvm::Value *DeviceID = nullptr;
8043     if (Device) {
8044       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
8045                                            CGF.Int64Ty, /*isSigned=*/true);
8046     } else {
8047       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
8048     }
8049 
8050     // Emit the number of elements in the offloading arrays.
8051     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
8052 
8053     llvm::Value *OffloadingArgs[] = {
8054         DeviceID,         PointerNum,    BasePointersArrayArg,
8055         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
8056     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end),
8057                         OffloadingArgs);
8058   };
8059 
8060   // If we need device pointer privatization, we need to emit the body of the
8061   // region with no privatization in the 'else' branch of the conditional.
8062   // Otherwise, we don't have to do anything.
8063   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
8064                                                          PrePostActionTy &) {
8065     if (!Info.CaptureDeviceAddrMap.empty()) {
8066       CodeGen.setAction(NoPrivAction);
8067       CodeGen(CGF);
8068     }
8069   };
8070 
8071   // We don't have to do anything to close the region if the if clause evaluates
8072   // to false.
8073   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
8074 
8075   if (IfCond) {
8076     emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
8077   } else {
8078     RegionCodeGenTy RCG(BeginThenGen);
8079     RCG(CGF);
8080   }
8081 
8082   // If we don't require privatization of device pointers, we emit the body in
8083   // between the runtime calls. This avoids duplicating the body code.
8084   if (Info.CaptureDeviceAddrMap.empty()) {
8085     CodeGen.setAction(NoPrivAction);
8086     CodeGen(CGF);
8087   }
8088 
8089   if (IfCond) {
8090     emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen);
8091   } else {
8092     RegionCodeGenTy RCG(EndThenGen);
8093     RCG(CGF);
8094   }
8095 }
8096 
8097 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
8098     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
8099     const Expr *Device) {
8100   if (!CGF.HaveInsertPoint())
8101     return;
8102 
8103   assert((isa<OMPTargetEnterDataDirective>(D) ||
8104           isa<OMPTargetExitDataDirective>(D) ||
8105           isa<OMPTargetUpdateDirective>(D)) &&
8106          "Expecting either target enter, exit data, or update directives.");
8107 
8108   CodeGenFunction::OMPTargetDataInfo InputInfo;
8109   llvm::Value *MapTypesArray = nullptr;
8110   // Generate the code for the opening of the data environment.
8111   auto &&ThenGen = [this, &D, Device, &InputInfo,
8112                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
8113     // Emit device ID if any.
8114     llvm::Value *DeviceID = nullptr;
8115     if (Device) {
8116       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
8117                                            CGF.Int64Ty, /*isSigned=*/true);
8118     } else {
8119       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
8120     }
8121 
8122     // Emit the number of elements in the offloading arrays.
8123     llvm::Constant *PointerNum =
8124         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
8125 
8126     llvm::Value *OffloadingArgs[] = {DeviceID,
8127                                      PointerNum,
8128                                      InputInfo.BasePointersArray.getPointer(),
8129                                      InputInfo.PointersArray.getPointer(),
8130                                      InputInfo.SizesArray.getPointer(),
8131                                      MapTypesArray};
8132 
8133     // Select the right runtime function call for each expected standalone
8134     // directive.
8135     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
8136     OpenMPRTLFunction RTLFn;
8137     switch (D.getDirectiveKind()) {
8138     case OMPD_target_enter_data:
8139       RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
8140                         : OMPRTL__tgt_target_data_begin;
8141       break;
8142     case OMPD_target_exit_data:
8143       RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
8144                         : OMPRTL__tgt_target_data_end;
8145       break;
8146     case OMPD_target_update:
8147       RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
8148                         : OMPRTL__tgt_target_data_update;
8149       break;
8150     case OMPD_parallel:
8151     case OMPD_for:
8152     case OMPD_parallel_for:
8153     case OMPD_parallel_sections:
8154     case OMPD_for_simd:
8155     case OMPD_parallel_for_simd:
8156     case OMPD_cancel:
8157     case OMPD_cancellation_point:
8158     case OMPD_ordered:
8159     case OMPD_threadprivate:
8160     case OMPD_task:
8161     case OMPD_simd:
8162     case OMPD_sections:
8163     case OMPD_section:
8164     case OMPD_single:
8165     case OMPD_master:
8166     case OMPD_critical:
8167     case OMPD_taskyield:
8168     case OMPD_barrier:
8169     case OMPD_taskwait:
8170     case OMPD_taskgroup:
8171     case OMPD_atomic:
8172     case OMPD_flush:
8173     case OMPD_teams:
8174     case OMPD_target_data:
8175     case OMPD_distribute:
8176     case OMPD_distribute_simd:
8177     case OMPD_distribute_parallel_for:
8178     case OMPD_distribute_parallel_for_simd:
8179     case OMPD_teams_distribute:
8180     case OMPD_teams_distribute_simd:
8181     case OMPD_teams_distribute_parallel_for:
8182     case OMPD_teams_distribute_parallel_for_simd:
8183     case OMPD_declare_simd:
8184     case OMPD_declare_target:
8185     case OMPD_end_declare_target:
8186     case OMPD_declare_reduction:
8187     case OMPD_taskloop:
8188     case OMPD_taskloop_simd:
8189     case OMPD_target:
8190     case OMPD_target_simd:
8191     case OMPD_target_teams_distribute:
8192     case OMPD_target_teams_distribute_simd:
8193     case OMPD_target_teams_distribute_parallel_for:
8194     case OMPD_target_teams_distribute_parallel_for_simd:
8195     case OMPD_target_teams:
8196     case OMPD_target_parallel:
8197     case OMPD_target_parallel_for:
8198     case OMPD_target_parallel_for_simd:
8199     case OMPD_unknown:
8200       llvm_unreachable("Unexpected standalone target data directive.");
8201       break;
8202     }
8203     CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
8204   };
8205 
8206   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
8207                              CodeGenFunction &CGF, PrePostActionTy &) {
8208     // Fill up the arrays with all the mapped variables.
8209     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
8210     MappableExprsHandler::MapValuesArrayTy Pointers;
8211     MappableExprsHandler::MapValuesArrayTy Sizes;
8212     MappableExprsHandler::MapFlagsArrayTy MapTypes;
8213 
8214     // Get map clause information.
8215     MappableExprsHandler MEHandler(D, CGF);
8216     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
8217 
8218     TargetDataInfo Info;
8219     // Fill up the arrays and create the arguments.
8220     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
8221     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
8222                                  Info.PointersArray, Info.SizesArray,
8223                                  Info.MapTypesArray, Info);
8224     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
8225     InputInfo.BasePointersArray =
8226         Address(Info.BasePointersArray, CGM.getPointerAlign());
8227     InputInfo.PointersArray =
8228         Address(Info.PointersArray, CGM.getPointerAlign());
8229     InputInfo.SizesArray =
8230         Address(Info.SizesArray, CGM.getPointerAlign());
8231     MapTypesArray = Info.MapTypesArray;
8232     if (D.hasClausesOfKind<OMPDependClause>())
8233       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
8234     else
8235       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
8236   };
8237 
8238   if (IfCond) {
8239     emitOMPIfClause(CGF, IfCond, TargetThenGen,
8240                     [](CodeGenFunction &CGF, PrePostActionTy &) {});
8241   } else {
8242     RegionCodeGenTy ThenRCG(TargetThenGen);
8243     ThenRCG(CGF);
8244   }
8245 }
8246 
8247 namespace {
8248   /// Kind of parameter in a function with 'declare simd' directive.
8249   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
8250   /// Attribute set of the parameter.
8251   struct ParamAttrTy {
8252     ParamKindTy Kind = Vector;
8253     llvm::APSInt StrideOrArg;
8254     llvm::APSInt Alignment;
8255   };
8256 } // namespace
8257 
8258 static unsigned evaluateCDTSize(const FunctionDecl *FD,
8259                                 ArrayRef<ParamAttrTy> ParamAttrs) {
8260   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
8261   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
8262   // of that clause. The VLEN value must be power of 2.
8263   // In other case the notion of the function`s "characteristic data type" (CDT)
8264   // is used to compute the vector length.
8265   // CDT is defined in the following order:
8266   //   a) For non-void function, the CDT is the return type.
8267   //   b) If the function has any non-uniform, non-linear parameters, then the
8268   //   CDT is the type of the first such parameter.
8269   //   c) If the CDT determined by a) or b) above is struct, union, or class
8270   //   type which is pass-by-value (except for the type that maps to the
8271   //   built-in complex data type), the characteristic data type is int.
8272   //   d) If none of the above three cases is applicable, the CDT is int.
8273   // The VLEN is then determined based on the CDT and the size of vector
8274   // register of that ISA for which current vector version is generated. The
8275   // VLEN is computed using the formula below:
8276   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
8277   // where vector register size specified in section 3.2.1 Registers and the
8278   // Stack Frame of original AMD64 ABI document.
8279   QualType RetType = FD->getReturnType();
8280   if (RetType.isNull())
8281     return 0;
8282   ASTContext &C = FD->getASTContext();
8283   QualType CDT;
8284   if (!RetType.isNull() && !RetType->isVoidType()) {
8285     CDT = RetType;
8286   } else {
8287     unsigned Offset = 0;
8288     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
8289       if (ParamAttrs[Offset].Kind == Vector)
8290         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
8291       ++Offset;
8292     }
8293     if (CDT.isNull()) {
8294       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
8295         if (ParamAttrs[I + Offset].Kind == Vector) {
8296           CDT = FD->getParamDecl(I)->getType();
8297           break;
8298         }
8299       }
8300     }
8301   }
8302   if (CDT.isNull())
8303     CDT = C.IntTy;
8304   CDT = CDT->getCanonicalTypeUnqualified();
8305   if (CDT->isRecordType() || CDT->isUnionType())
8306     CDT = C.IntTy;
8307   return C.getTypeSize(CDT);
8308 }
8309 
8310 static void
8311 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
8312                            const llvm::APSInt &VLENVal,
8313                            ArrayRef<ParamAttrTy> ParamAttrs,
8314                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
8315   struct ISADataTy {
8316     char ISA;
8317     unsigned VecRegSize;
8318   };
8319   ISADataTy ISAData[] = {
8320       {
8321           'b', 128
8322       }, // SSE
8323       {
8324           'c', 256
8325       }, // AVX
8326       {
8327           'd', 256
8328       }, // AVX2
8329       {
8330           'e', 512
8331       }, // AVX512
8332   };
8333   llvm::SmallVector<char, 2> Masked;
8334   switch (State) {
8335   case OMPDeclareSimdDeclAttr::BS_Undefined:
8336     Masked.push_back('N');
8337     Masked.push_back('M');
8338     break;
8339   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
8340     Masked.push_back('N');
8341     break;
8342   case OMPDeclareSimdDeclAttr::BS_Inbranch:
8343     Masked.push_back('M');
8344     break;
8345   }
8346   for (char Mask : Masked) {
8347     for (const ISADataTy &Data : ISAData) {
8348       SmallString<256> Buffer;
8349       llvm::raw_svector_ostream Out(Buffer);
8350       Out << "_ZGV" << Data.ISA << Mask;
8351       if (!VLENVal) {
8352         Out << llvm::APSInt::getUnsigned(Data.VecRegSize /
8353                                          evaluateCDTSize(FD, ParamAttrs));
8354       } else {
8355         Out << VLENVal;
8356       }
8357       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
8358         switch (ParamAttr.Kind){
8359         case LinearWithVarStride:
8360           Out << 's' << ParamAttr.StrideOrArg;
8361           break;
8362         case Linear:
8363           Out << 'l';
8364           if (!!ParamAttr.StrideOrArg)
8365             Out << ParamAttr.StrideOrArg;
8366           break;
8367         case Uniform:
8368           Out << 'u';
8369           break;
8370         case Vector:
8371           Out << 'v';
8372           break;
8373         }
8374         if (!!ParamAttr.Alignment)
8375           Out << 'a' << ParamAttr.Alignment;
8376       }
8377       Out << '_' << Fn->getName();
8378       Fn->addFnAttr(Out.str());
8379     }
8380   }
8381 }
8382 
8383 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
8384                                               llvm::Function *Fn) {
8385   ASTContext &C = CGM.getContext();
8386   FD = FD->getMostRecentDecl();
8387   // Map params to their positions in function decl.
8388   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
8389   if (isa<CXXMethodDecl>(FD))
8390     ParamPositions.try_emplace(FD, 0);
8391   unsigned ParamPos = ParamPositions.size();
8392   for (const ParmVarDecl *P : FD->parameters()) {
8393     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
8394     ++ParamPos;
8395   }
8396   while (FD) {
8397     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
8398       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
8399       // Mark uniform parameters.
8400       for (const Expr *E : Attr->uniforms()) {
8401         E = E->IgnoreParenImpCasts();
8402         unsigned Pos;
8403         if (isa<CXXThisExpr>(E)) {
8404           Pos = ParamPositions[FD];
8405         } else {
8406           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
8407                                 ->getCanonicalDecl();
8408           Pos = ParamPositions[PVD];
8409         }
8410         ParamAttrs[Pos].Kind = Uniform;
8411       }
8412       // Get alignment info.
8413       auto NI = Attr->alignments_begin();
8414       for (const Expr *E : Attr->aligneds()) {
8415         E = E->IgnoreParenImpCasts();
8416         unsigned Pos;
8417         QualType ParmTy;
8418         if (isa<CXXThisExpr>(E)) {
8419           Pos = ParamPositions[FD];
8420           ParmTy = E->getType();
8421         } else {
8422           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
8423                                 ->getCanonicalDecl();
8424           Pos = ParamPositions[PVD];
8425           ParmTy = PVD->getType();
8426         }
8427         ParamAttrs[Pos].Alignment =
8428             (*NI)
8429                 ? (*NI)->EvaluateKnownConstInt(C)
8430                 : llvm::APSInt::getUnsigned(
8431                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
8432                           .getQuantity());
8433         ++NI;
8434       }
8435       // Mark linear parameters.
8436       auto SI = Attr->steps_begin();
8437       auto MI = Attr->modifiers_begin();
8438       for (const Expr *E : Attr->linears()) {
8439         E = E->IgnoreParenImpCasts();
8440         unsigned Pos;
8441         if (isa<CXXThisExpr>(E)) {
8442           Pos = ParamPositions[FD];
8443         } else {
8444           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
8445                                 ->getCanonicalDecl();
8446           Pos = ParamPositions[PVD];
8447         }
8448         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
8449         ParamAttr.Kind = Linear;
8450         if (*SI) {
8451           if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C,
8452                                     Expr::SE_AllowSideEffects)) {
8453             if (const auto *DRE =
8454                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
8455               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
8456                 ParamAttr.Kind = LinearWithVarStride;
8457                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
8458                     ParamPositions[StridePVD->getCanonicalDecl()]);
8459               }
8460             }
8461           }
8462         }
8463         ++SI;
8464         ++MI;
8465       }
8466       llvm::APSInt VLENVal;
8467       if (const Expr *VLEN = Attr->getSimdlen())
8468         VLENVal = VLEN->EvaluateKnownConstInt(C);
8469       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
8470       if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
8471           CGM.getTriple().getArch() == llvm::Triple::x86_64)
8472         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
8473     }
8474     FD = FD->getPreviousDecl();
8475   }
8476 }
8477 
8478 namespace {
8479 /// Cleanup action for doacross support.
8480 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
8481 public:
8482   static const int DoacrossFinArgs = 2;
8483 
8484 private:
8485   llvm::Value *RTLFn;
8486   llvm::Value *Args[DoacrossFinArgs];
8487 
8488 public:
8489   DoacrossCleanupTy(llvm::Value *RTLFn, ArrayRef<llvm::Value *> CallArgs)
8490       : RTLFn(RTLFn) {
8491     assert(CallArgs.size() == DoacrossFinArgs);
8492     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
8493   }
8494   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
8495     if (!CGF.HaveInsertPoint())
8496       return;
8497     CGF.EmitRuntimeCall(RTLFn, Args);
8498   }
8499 };
8500 } // namespace
8501 
8502 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
8503                                        const OMPLoopDirective &D) {
8504   if (!CGF.HaveInsertPoint())
8505     return;
8506 
8507   ASTContext &C = CGM.getContext();
8508   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
8509   RecordDecl *RD;
8510   if (KmpDimTy.isNull()) {
8511     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
8512     //  kmp_int64 lo; // lower
8513     //  kmp_int64 up; // upper
8514     //  kmp_int64 st; // stride
8515     // };
8516     RD = C.buildImplicitRecord("kmp_dim");
8517     RD->startDefinition();
8518     addFieldToRecordDecl(C, RD, Int64Ty);
8519     addFieldToRecordDecl(C, RD, Int64Ty);
8520     addFieldToRecordDecl(C, RD, Int64Ty);
8521     RD->completeDefinition();
8522     KmpDimTy = C.getRecordType(RD);
8523   } else {
8524     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
8525   }
8526 
8527   Address DimsAddr = CGF.CreateMemTemp(KmpDimTy, "dims");
8528   CGF.EmitNullInitialization(DimsAddr, KmpDimTy);
8529   enum { LowerFD = 0, UpperFD, StrideFD };
8530   // Fill dims with data.
8531   LValue DimsLVal = CGF.MakeAddrLValue(DimsAddr, KmpDimTy);
8532   // dims.upper = num_iterations;
8533   LValue UpperLVal =
8534       CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), UpperFD));
8535   llvm::Value *NumIterVal = CGF.EmitScalarConversion(
8536       CGF.EmitScalarExpr(D.getNumIterations()), D.getNumIterations()->getType(),
8537       Int64Ty, D.getNumIterations()->getExprLoc());
8538   CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
8539   // dims.stride = 1;
8540   LValue StrideLVal =
8541       CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), StrideFD));
8542   CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
8543                         StrideLVal);
8544 
8545   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
8546   // kmp_int32 num_dims, struct kmp_dim * dims);
8547   llvm::Value *Args[] = {emitUpdateLocation(CGF, D.getLocStart()),
8548                          getThreadID(CGF, D.getLocStart()),
8549                          llvm::ConstantInt::getSigned(CGM.Int32Ty, 1),
8550                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8551                              DimsAddr.getPointer(), CGM.VoidPtrTy)};
8552 
8553   llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_init);
8554   CGF.EmitRuntimeCall(RTLFn, Args);
8555   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
8556       emitUpdateLocation(CGF, D.getLocEnd()), getThreadID(CGF, D.getLocEnd())};
8557   llvm::Value *FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
8558   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
8559                                              llvm::makeArrayRef(FiniArgs));
8560 }
8561 
8562 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
8563                                           const OMPDependClause *C) {
8564   QualType Int64Ty =
8565       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
8566   const Expr *CounterVal = C->getCounterValue();
8567   assert(CounterVal);
8568   llvm::Value *CntVal = CGF.EmitScalarConversion(CGF.EmitScalarExpr(CounterVal),
8569                                                  CounterVal->getType(), Int64Ty,
8570                                                  CounterVal->getExprLoc());
8571   Address CntAddr = CGF.CreateMemTemp(Int64Ty, ".cnt.addr");
8572   CGF.EmitStoreOfScalar(CntVal, CntAddr, /*Volatile=*/false, Int64Ty);
8573   llvm::Value *Args[] = {emitUpdateLocation(CGF, C->getLocStart()),
8574                          getThreadID(CGF, C->getLocStart()),
8575                          CntAddr.getPointer()};
8576   llvm::Value *RTLFn;
8577   if (C->getDependencyKind() == OMPC_DEPEND_source) {
8578     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
8579   } else {
8580     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
8581     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
8582   }
8583   CGF.EmitRuntimeCall(RTLFn, Args);
8584 }
8585 
8586 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
8587                                llvm::Value *Callee,
8588                                ArrayRef<llvm::Value *> Args) const {
8589   assert(Loc.isValid() && "Outlined function call location must be valid.");
8590   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
8591 
8592   if (auto *Fn = dyn_cast<llvm::Function>(Callee)) {
8593     if (Fn->doesNotThrow()) {
8594       CGF.EmitNounwindRuntimeCall(Fn, Args);
8595       return;
8596     }
8597   }
8598   CGF.EmitRuntimeCall(Callee, Args);
8599 }
8600 
8601 void CGOpenMPRuntime::emitOutlinedFunctionCall(
8602     CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn,
8603     ArrayRef<llvm::Value *> Args) const {
8604   emitCall(CGF, Loc, OutlinedFn, Args);
8605 }
8606 
8607 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
8608                                              const VarDecl *NativeParam,
8609                                              const VarDecl *TargetParam) const {
8610   return CGF.GetAddrOfLocalVar(NativeParam);
8611 }
8612 
8613 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
8614                                                    const VarDecl *VD) {
8615   return Address::invalid();
8616 }
8617 
8618 llvm::Value *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
8619     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
8620     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
8621   llvm_unreachable("Not supported in SIMD-only mode");
8622 }
8623 
8624 llvm::Value *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
8625     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
8626     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
8627   llvm_unreachable("Not supported in SIMD-only mode");
8628 }
8629 
8630 llvm::Value *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
8631     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
8632     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
8633     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
8634     bool Tied, unsigned &NumberOfParts) {
8635   llvm_unreachable("Not supported in SIMD-only mode");
8636 }
8637 
8638 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
8639                                            SourceLocation Loc,
8640                                            llvm::Value *OutlinedFn,
8641                                            ArrayRef<llvm::Value *> CapturedVars,
8642                                            const Expr *IfCond) {
8643   llvm_unreachable("Not supported in SIMD-only mode");
8644 }
8645 
8646 void CGOpenMPSIMDRuntime::emitCriticalRegion(
8647     CodeGenFunction &CGF, StringRef CriticalName,
8648     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
8649     const Expr *Hint) {
8650   llvm_unreachable("Not supported in SIMD-only mode");
8651 }
8652 
8653 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
8654                                            const RegionCodeGenTy &MasterOpGen,
8655                                            SourceLocation Loc) {
8656   llvm_unreachable("Not supported in SIMD-only mode");
8657 }
8658 
8659 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
8660                                             SourceLocation Loc) {
8661   llvm_unreachable("Not supported in SIMD-only mode");
8662 }
8663 
8664 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
8665     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
8666     SourceLocation Loc) {
8667   llvm_unreachable("Not supported in SIMD-only mode");
8668 }
8669 
8670 void CGOpenMPSIMDRuntime::emitSingleRegion(
8671     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
8672     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
8673     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
8674     ArrayRef<const Expr *> AssignmentOps) {
8675   llvm_unreachable("Not supported in SIMD-only mode");
8676 }
8677 
8678 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
8679                                             const RegionCodeGenTy &OrderedOpGen,
8680                                             SourceLocation Loc,
8681                                             bool IsThreads) {
8682   llvm_unreachable("Not supported in SIMD-only mode");
8683 }
8684 
8685 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
8686                                           SourceLocation Loc,
8687                                           OpenMPDirectiveKind Kind,
8688                                           bool EmitChecks,
8689                                           bool ForceSimpleCall) {
8690   llvm_unreachable("Not supported in SIMD-only mode");
8691 }
8692 
8693 void CGOpenMPSIMDRuntime::emitForDispatchInit(
8694     CodeGenFunction &CGF, SourceLocation Loc,
8695     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
8696     bool Ordered, const DispatchRTInput &DispatchValues) {
8697   llvm_unreachable("Not supported in SIMD-only mode");
8698 }
8699 
8700 void CGOpenMPSIMDRuntime::emitForStaticInit(
8701     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
8702     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
8703   llvm_unreachable("Not supported in SIMD-only mode");
8704 }
8705 
8706 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
8707     CodeGenFunction &CGF, SourceLocation Loc,
8708     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
8709   llvm_unreachable("Not supported in SIMD-only mode");
8710 }
8711 
8712 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
8713                                                      SourceLocation Loc,
8714                                                      unsigned IVSize,
8715                                                      bool IVSigned) {
8716   llvm_unreachable("Not supported in SIMD-only mode");
8717 }
8718 
8719 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
8720                                               SourceLocation Loc,
8721                                               OpenMPDirectiveKind DKind) {
8722   llvm_unreachable("Not supported in SIMD-only mode");
8723 }
8724 
8725 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
8726                                               SourceLocation Loc,
8727                                               unsigned IVSize, bool IVSigned,
8728                                               Address IL, Address LB,
8729                                               Address UB, Address ST) {
8730   llvm_unreachable("Not supported in SIMD-only mode");
8731 }
8732 
8733 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
8734                                                llvm::Value *NumThreads,
8735                                                SourceLocation Loc) {
8736   llvm_unreachable("Not supported in SIMD-only mode");
8737 }
8738 
8739 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
8740                                              OpenMPProcBindClauseKind ProcBind,
8741                                              SourceLocation Loc) {
8742   llvm_unreachable("Not supported in SIMD-only mode");
8743 }
8744 
8745 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
8746                                                     const VarDecl *VD,
8747                                                     Address VDAddr,
8748                                                     SourceLocation Loc) {
8749   llvm_unreachable("Not supported in SIMD-only mode");
8750 }
8751 
8752 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
8753     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
8754     CodeGenFunction *CGF) {
8755   llvm_unreachable("Not supported in SIMD-only mode");
8756 }
8757 
8758 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
8759     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
8760   llvm_unreachable("Not supported in SIMD-only mode");
8761 }
8762 
8763 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
8764                                     ArrayRef<const Expr *> Vars,
8765                                     SourceLocation Loc) {
8766   llvm_unreachable("Not supported in SIMD-only mode");
8767 }
8768 
8769 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
8770                                        const OMPExecutableDirective &D,
8771                                        llvm::Value *TaskFunction,
8772                                        QualType SharedsTy, Address Shareds,
8773                                        const Expr *IfCond,
8774                                        const OMPTaskDataTy &Data) {
8775   llvm_unreachable("Not supported in SIMD-only mode");
8776 }
8777 
8778 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
8779     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
8780     llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds,
8781     const Expr *IfCond, const OMPTaskDataTy &Data) {
8782   llvm_unreachable("Not supported in SIMD-only mode");
8783 }
8784 
8785 void CGOpenMPSIMDRuntime::emitReduction(
8786     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
8787     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
8788     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
8789   assert(Options.SimpleReduction && "Only simple reduction is expected.");
8790   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
8791                                  ReductionOps, Options);
8792 }
8793 
8794 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
8795     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
8796     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
8797   llvm_unreachable("Not supported in SIMD-only mode");
8798 }
8799 
8800 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
8801                                                   SourceLocation Loc,
8802                                                   ReductionCodeGen &RCG,
8803                                                   unsigned N) {
8804   llvm_unreachable("Not supported in SIMD-only mode");
8805 }
8806 
8807 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
8808                                                   SourceLocation Loc,
8809                                                   llvm::Value *ReductionsPtr,
8810                                                   LValue SharedLVal) {
8811   llvm_unreachable("Not supported in SIMD-only mode");
8812 }
8813 
8814 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
8815                                            SourceLocation Loc) {
8816   llvm_unreachable("Not supported in SIMD-only mode");
8817 }
8818 
8819 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
8820     CodeGenFunction &CGF, SourceLocation Loc,
8821     OpenMPDirectiveKind CancelRegion) {
8822   llvm_unreachable("Not supported in SIMD-only mode");
8823 }
8824 
8825 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
8826                                          SourceLocation Loc, const Expr *IfCond,
8827                                          OpenMPDirectiveKind CancelRegion) {
8828   llvm_unreachable("Not supported in SIMD-only mode");
8829 }
8830 
8831 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
8832     const OMPExecutableDirective &D, StringRef ParentName,
8833     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
8834     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
8835   llvm_unreachable("Not supported in SIMD-only mode");
8836 }
8837 
8838 void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF,
8839                                          const OMPExecutableDirective &D,
8840                                          llvm::Value *OutlinedFn,
8841                                          llvm::Value *OutlinedFnID,
8842                                          const Expr *IfCond, const Expr *Device) {
8843   llvm_unreachable("Not supported in SIMD-only mode");
8844 }
8845 
8846 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
8847   llvm_unreachable("Not supported in SIMD-only mode");
8848 }
8849 
8850 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
8851   llvm_unreachable("Not supported in SIMD-only mode");
8852 }
8853 
8854 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
8855   return false;
8856 }
8857 
8858 llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() {
8859   return nullptr;
8860 }
8861 
8862 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
8863                                         const OMPExecutableDirective &D,
8864                                         SourceLocation Loc,
8865                                         llvm::Value *OutlinedFn,
8866                                         ArrayRef<llvm::Value *> CapturedVars) {
8867   llvm_unreachable("Not supported in SIMD-only mode");
8868 }
8869 
8870 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
8871                                              const Expr *NumTeams,
8872                                              const Expr *ThreadLimit,
8873                                              SourceLocation Loc) {
8874   llvm_unreachable("Not supported in SIMD-only mode");
8875 }
8876 
8877 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
8878     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
8879     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
8880   llvm_unreachable("Not supported in SIMD-only mode");
8881 }
8882 
8883 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
8884     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
8885     const Expr *Device) {
8886   llvm_unreachable("Not supported in SIMD-only mode");
8887 }
8888 
8889 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
8890                                            const OMPLoopDirective &D) {
8891   llvm_unreachable("Not supported in SIMD-only mode");
8892 }
8893 
8894 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
8895                                               const OMPDependClause *C) {
8896   llvm_unreachable("Not supported in SIMD-only mode");
8897 }
8898 
8899 const VarDecl *
8900 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
8901                                         const VarDecl *NativeParam) const {
8902   llvm_unreachable("Not supported in SIMD-only mode");
8903 }
8904 
8905 Address
8906 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
8907                                          const VarDecl *NativeParam,
8908                                          const VarDecl *TargetParam) const {
8909   llvm_unreachable("Not supported in SIMD-only mode");
8910 }
8911 
8912