1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for OpenMP runtime code generation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGOpenMPRuntime.h"
17 #include "CodeGenFunction.h"
18 #include "clang/CodeGen/ConstantInitBuilder.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/BitmaskEnum.h"
23 #include "llvm/Bitcode/BitcodeReader.h"
24 #include "llvm/IR/CallSite.h"
25 #include "llvm/IR/DerivedTypes.h"
26 #include "llvm/IR/GlobalValue.h"
27 #include "llvm/IR/Value.h"
28 #include "llvm/Support/Format.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <cassert>
31 
32 using namespace clang;
33 using namespace CodeGen;
34 
35 namespace {
36 /// \brief Base class for handling code generation inside OpenMP regions.
37 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
38 public:
39   /// \brief Kinds of OpenMP regions used in codegen.
40   enum CGOpenMPRegionKind {
41     /// \brief Region with outlined function for standalone 'parallel'
42     /// directive.
43     ParallelOutlinedRegion,
44     /// \brief Region with outlined function for standalone 'task' directive.
45     TaskOutlinedRegion,
46     /// \brief Region for constructs that do not require function outlining,
47     /// like 'for', 'sections', 'atomic' etc. directives.
48     InlinedRegion,
49     /// \brief Region with outlined function for standalone 'target' directive.
50     TargetRegion,
51   };
52 
53   CGOpenMPRegionInfo(const CapturedStmt &CS,
54                      const CGOpenMPRegionKind RegionKind,
55                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
56                      bool HasCancel)
57       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
58         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
59 
60   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
61                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
62                      bool HasCancel)
63       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
64         Kind(Kind), HasCancel(HasCancel) {}
65 
66   /// \brief Get a variable or parameter for storing global thread id
67   /// inside OpenMP construct.
68   virtual const VarDecl *getThreadIDVariable() const = 0;
69 
70   /// \brief Emit the captured statement body.
71   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
72 
73   /// \brief Get an LValue for the current ThreadID variable.
74   /// \return LValue for thread id variable. This LValue always has type int32*.
75   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
76 
77   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
78 
79   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
80 
81   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
82 
83   bool hasCancel() const { return HasCancel; }
84 
85   static bool classof(const CGCapturedStmtInfo *Info) {
86     return Info->getKind() == CR_OpenMP;
87   }
88 
89   ~CGOpenMPRegionInfo() override = default;
90 
91 protected:
92   CGOpenMPRegionKind RegionKind;
93   RegionCodeGenTy CodeGen;
94   OpenMPDirectiveKind Kind;
95   bool HasCancel;
96 };
97 
98 /// \brief API for captured statement code generation in OpenMP constructs.
99 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
100 public:
101   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
102                              const RegionCodeGenTy &CodeGen,
103                              OpenMPDirectiveKind Kind, bool HasCancel,
104                              StringRef HelperName)
105       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
106                            HasCancel),
107         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
108     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
109   }
110 
111   /// \brief Get a variable or parameter for storing global thread id
112   /// inside OpenMP construct.
113   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
114 
115   /// \brief Get the name of the capture helper.
116   StringRef getHelperName() const override { return HelperName; }
117 
118   static bool classof(const CGCapturedStmtInfo *Info) {
119     return CGOpenMPRegionInfo::classof(Info) &&
120            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
121                ParallelOutlinedRegion;
122   }
123 
124 private:
125   /// \brief A variable or parameter storing global thread id for OpenMP
126   /// constructs.
127   const VarDecl *ThreadIDVar;
128   StringRef HelperName;
129 };
130 
131 /// \brief API for captured statement code generation in OpenMP constructs.
132 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
133 public:
134   class UntiedTaskActionTy final : public PrePostActionTy {
135     bool Untied;
136     const VarDecl *PartIDVar;
137     const RegionCodeGenTy UntiedCodeGen;
138     llvm::SwitchInst *UntiedSwitch = nullptr;
139 
140   public:
141     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
142                        const RegionCodeGenTy &UntiedCodeGen)
143         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
144     void Enter(CodeGenFunction &CGF) override {
145       if (Untied) {
146         // Emit task switching point.
147         auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
148             CGF.GetAddrOfLocalVar(PartIDVar),
149             PartIDVar->getType()->castAs<PointerType>());
150         auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
151         auto *DoneBB = CGF.createBasicBlock(".untied.done.");
152         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
153         CGF.EmitBlock(DoneBB);
154         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
155         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
156         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
157                               CGF.Builder.GetInsertBlock());
158         emitUntiedSwitch(CGF);
159       }
160     }
161     void emitUntiedSwitch(CodeGenFunction &CGF) const {
162       if (Untied) {
163         auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
164             CGF.GetAddrOfLocalVar(PartIDVar),
165             PartIDVar->getType()->castAs<PointerType>());
166         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
167                               PartIdLVal);
168         UntiedCodeGen(CGF);
169         CodeGenFunction::JumpDest CurPoint =
170             CGF.getJumpDestInCurrentScope(".untied.next.");
171         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
172         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
173         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
174                               CGF.Builder.GetInsertBlock());
175         CGF.EmitBranchThroughCleanup(CurPoint);
176         CGF.EmitBlock(CurPoint.getBlock());
177       }
178     }
179     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
180   };
181   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
182                                  const VarDecl *ThreadIDVar,
183                                  const RegionCodeGenTy &CodeGen,
184                                  OpenMPDirectiveKind Kind, bool HasCancel,
185                                  const UntiedTaskActionTy &Action)
186       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
187         ThreadIDVar(ThreadIDVar), Action(Action) {
188     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
189   }
190 
191   /// \brief Get a variable or parameter for storing global thread id
192   /// inside OpenMP construct.
193   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
194 
195   /// \brief Get an LValue for the current ThreadID variable.
196   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
197 
198   /// \brief Get the name of the capture helper.
199   StringRef getHelperName() const override { return ".omp_outlined."; }
200 
201   void emitUntiedSwitch(CodeGenFunction &CGF) override {
202     Action.emitUntiedSwitch(CGF);
203   }
204 
205   static bool classof(const CGCapturedStmtInfo *Info) {
206     return CGOpenMPRegionInfo::classof(Info) &&
207            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
208                TaskOutlinedRegion;
209   }
210 
211 private:
212   /// \brief A variable or parameter storing global thread id for OpenMP
213   /// constructs.
214   const VarDecl *ThreadIDVar;
215   /// Action for emitting code for untied tasks.
216   const UntiedTaskActionTy &Action;
217 };
218 
219 /// \brief API for inlined captured statement code generation in OpenMP
220 /// constructs.
221 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
222 public:
223   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
224                             const RegionCodeGenTy &CodeGen,
225                             OpenMPDirectiveKind Kind, bool HasCancel)
226       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
227         OldCSI(OldCSI),
228         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
229 
230   // \brief Retrieve the value of the context parameter.
231   llvm::Value *getContextValue() const override {
232     if (OuterRegionInfo)
233       return OuterRegionInfo->getContextValue();
234     llvm_unreachable("No context value for inlined OpenMP region");
235   }
236 
237   void setContextValue(llvm::Value *V) override {
238     if (OuterRegionInfo) {
239       OuterRegionInfo->setContextValue(V);
240       return;
241     }
242     llvm_unreachable("No context value for inlined OpenMP region");
243   }
244 
245   /// \brief Lookup the captured field decl for a variable.
246   const FieldDecl *lookup(const VarDecl *VD) const override {
247     if (OuterRegionInfo)
248       return OuterRegionInfo->lookup(VD);
249     // If there is no outer outlined region,no need to lookup in a list of
250     // captured variables, we can use the original one.
251     return nullptr;
252   }
253 
254   FieldDecl *getThisFieldDecl() const override {
255     if (OuterRegionInfo)
256       return OuterRegionInfo->getThisFieldDecl();
257     return nullptr;
258   }
259 
260   /// \brief Get a variable or parameter for storing global thread id
261   /// inside OpenMP construct.
262   const VarDecl *getThreadIDVariable() const override {
263     if (OuterRegionInfo)
264       return OuterRegionInfo->getThreadIDVariable();
265     return nullptr;
266   }
267 
268   /// \brief Get an LValue for the current ThreadID variable.
269   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
270     if (OuterRegionInfo)
271       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
272     llvm_unreachable("No LValue for inlined OpenMP construct");
273   }
274 
275   /// \brief Get the name of the capture helper.
276   StringRef getHelperName() const override {
277     if (auto *OuterRegionInfo = getOldCSI())
278       return OuterRegionInfo->getHelperName();
279     llvm_unreachable("No helper name for inlined OpenMP construct");
280   }
281 
282   void emitUntiedSwitch(CodeGenFunction &CGF) override {
283     if (OuterRegionInfo)
284       OuterRegionInfo->emitUntiedSwitch(CGF);
285   }
286 
287   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
288 
289   static bool classof(const CGCapturedStmtInfo *Info) {
290     return CGOpenMPRegionInfo::classof(Info) &&
291            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
292   }
293 
294   ~CGOpenMPInlinedRegionInfo() override = default;
295 
296 private:
297   /// \brief CodeGen info about outer OpenMP region.
298   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
299   CGOpenMPRegionInfo *OuterRegionInfo;
300 };
301 
302 /// \brief API for captured statement code generation in OpenMP target
303 /// constructs. For this captures, implicit parameters are used instead of the
304 /// captured fields. The name of the target region has to be unique in a given
305 /// application so it is provided by the client, because only the client has
306 /// the information to generate that.
307 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
308 public:
309   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
310                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
311       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
312                            /*HasCancel=*/false),
313         HelperName(HelperName) {}
314 
315   /// \brief This is unused for target regions because each starts executing
316   /// with a single thread.
317   const VarDecl *getThreadIDVariable() const override { return nullptr; }
318 
319   /// \brief Get the name of the capture helper.
320   StringRef getHelperName() const override { return HelperName; }
321 
322   static bool classof(const CGCapturedStmtInfo *Info) {
323     return CGOpenMPRegionInfo::classof(Info) &&
324            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
325   }
326 
327 private:
328   StringRef HelperName;
329 };
330 
331 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
332   llvm_unreachable("No codegen for expressions");
333 }
334 /// \brief API for generation of expressions captured in a innermost OpenMP
335 /// region.
336 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
337 public:
338   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
339       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
340                                   OMPD_unknown,
341                                   /*HasCancel=*/false),
342         PrivScope(CGF) {
343     // Make sure the globals captured in the provided statement are local by
344     // using the privatization logic. We assume the same variable is not
345     // captured more than once.
346     for (auto &C : CS.captures()) {
347       if (!C.capturesVariable() && !C.capturesVariableByCopy())
348         continue;
349 
350       const VarDecl *VD = C.getCapturedVar();
351       if (VD->isLocalVarDeclOrParm())
352         continue;
353 
354       DeclRefExpr DRE(const_cast<VarDecl *>(VD),
355                       /*RefersToEnclosingVariableOrCapture=*/false,
356                       VD->getType().getNonReferenceType(), VK_LValue,
357                       C.getLocation());
358       PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address {
359         return CGF.EmitLValue(&DRE).getAddress();
360       });
361     }
362     (void)PrivScope.Privatize();
363   }
364 
365   /// \brief Lookup the captured field decl for a variable.
366   const FieldDecl *lookup(const VarDecl *VD) const override {
367     if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
368       return FD;
369     return nullptr;
370   }
371 
372   /// \brief Emit the captured statement body.
373   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
374     llvm_unreachable("No body for expressions");
375   }
376 
377   /// \brief Get a variable or parameter for storing global thread id
378   /// inside OpenMP construct.
379   const VarDecl *getThreadIDVariable() const override {
380     llvm_unreachable("No thread id for expressions");
381   }
382 
383   /// \brief Get the name of the capture helper.
384   StringRef getHelperName() const override {
385     llvm_unreachable("No helper name for expressions");
386   }
387 
388   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
389 
390 private:
391   /// Private scope to capture global variables.
392   CodeGenFunction::OMPPrivateScope PrivScope;
393 };
394 
395 /// \brief RAII for emitting code of OpenMP constructs.
396 class InlinedOpenMPRegionRAII {
397   CodeGenFunction &CGF;
398   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
399   FieldDecl *LambdaThisCaptureField = nullptr;
400   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
401 
402 public:
403   /// \brief Constructs region for combined constructs.
404   /// \param CodeGen Code generation sequence for combined directives. Includes
405   /// a list of functions used for code generation of implicitly inlined
406   /// regions.
407   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
408                           OpenMPDirectiveKind Kind, bool HasCancel)
409       : CGF(CGF) {
410     // Start emission for the construct.
411     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
412         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
413     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
414     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
415     CGF.LambdaThisCaptureField = nullptr;
416     BlockInfo = CGF.BlockInfo;
417     CGF.BlockInfo = nullptr;
418   }
419 
420   ~InlinedOpenMPRegionRAII() {
421     // Restore original CapturedStmtInfo only if we're done with code emission.
422     auto *OldCSI =
423         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
424     delete CGF.CapturedStmtInfo;
425     CGF.CapturedStmtInfo = OldCSI;
426     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
427     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
428     CGF.BlockInfo = BlockInfo;
429   }
430 };
431 
432 /// \brief Values for bit flags used in the ident_t to describe the fields.
433 /// All enumeric elements are named and described in accordance with the code
434 /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
435 enum OpenMPLocationFlags : unsigned {
436   /// \brief Use trampoline for internal microtask.
437   OMP_IDENT_IMD = 0x01,
438   /// \brief Use c-style ident structure.
439   OMP_IDENT_KMPC = 0x02,
440   /// \brief Atomic reduction option for kmpc_reduce.
441   OMP_ATOMIC_REDUCE = 0x10,
442   /// \brief Explicit 'barrier' directive.
443   OMP_IDENT_BARRIER_EXPL = 0x20,
444   /// \brief Implicit barrier in code.
445   OMP_IDENT_BARRIER_IMPL = 0x40,
446   /// \brief Implicit barrier in 'for' directive.
447   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
448   /// \brief Implicit barrier in 'sections' directive.
449   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
450   /// \brief Implicit barrier in 'single' directive.
451   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
452   /// Call of __kmp_for_static_init for static loop.
453   OMP_IDENT_WORK_LOOP = 0x200,
454   /// Call of __kmp_for_static_init for sections.
455   OMP_IDENT_WORK_SECTIONS = 0x400,
456   /// Call of __kmp_for_static_init for distribute.
457   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
458   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
459 };
460 
461 /// \brief Describes ident structure that describes a source location.
462 /// All descriptions are taken from
463 /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
464 /// Original structure:
465 /// typedef struct ident {
466 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
467 ///                                  see above  */
468 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
469 ///                                  KMP_IDENT_KMPC identifies this union
470 ///                                  member  */
471 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
472 ///                                  see above */
473 ///#if USE_ITT_BUILD
474 ///                            /*  but currently used for storing
475 ///                                region-specific ITT */
476 ///                            /*  contextual information. */
477 ///#endif /* USE_ITT_BUILD */
478 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
479 ///                                 C++  */
480 ///    char const *psource;    /**< String describing the source location.
481 ///                            The string is composed of semi-colon separated
482 //                             fields which describe the source file,
483 ///                            the function and a pair of line numbers that
484 ///                            delimit the construct.
485 ///                             */
486 /// } ident_t;
487 enum IdentFieldIndex {
488   /// \brief might be used in Fortran
489   IdentField_Reserved_1,
490   /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
491   IdentField_Flags,
492   /// \brief Not really used in Fortran any more
493   IdentField_Reserved_2,
494   /// \brief Source[4] in Fortran, do not use for C++
495   IdentField_Reserved_3,
496   /// \brief String describing the source location. The string is composed of
497   /// semi-colon separated fields which describe the source file, the function
498   /// and a pair of line numbers that delimit the construct.
499   IdentField_PSource
500 };
501 
502 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from
503 /// the enum sched_type in kmp.h).
504 enum OpenMPSchedType {
505   /// \brief Lower bound for default (unordered) versions.
506   OMP_sch_lower = 32,
507   OMP_sch_static_chunked = 33,
508   OMP_sch_static = 34,
509   OMP_sch_dynamic_chunked = 35,
510   OMP_sch_guided_chunked = 36,
511   OMP_sch_runtime = 37,
512   OMP_sch_auto = 38,
513   /// static with chunk adjustment (e.g., simd)
514   OMP_sch_static_balanced_chunked = 45,
515   /// \brief Lower bound for 'ordered' versions.
516   OMP_ord_lower = 64,
517   OMP_ord_static_chunked = 65,
518   OMP_ord_static = 66,
519   OMP_ord_dynamic_chunked = 67,
520   OMP_ord_guided_chunked = 68,
521   OMP_ord_runtime = 69,
522   OMP_ord_auto = 70,
523   OMP_sch_default = OMP_sch_static,
524   /// \brief dist_schedule types
525   OMP_dist_sch_static_chunked = 91,
526   OMP_dist_sch_static = 92,
527   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
528   /// Set if the monotonic schedule modifier was present.
529   OMP_sch_modifier_monotonic = (1 << 29),
530   /// Set if the nonmonotonic schedule modifier was present.
531   OMP_sch_modifier_nonmonotonic = (1 << 30),
532 };
533 
534 enum OpenMPRTLFunction {
535   /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
536   /// kmpc_micro microtask, ...);
537   OMPRTL__kmpc_fork_call,
538   /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc,
539   /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
540   OMPRTL__kmpc_threadprivate_cached,
541   /// \brief Call to void __kmpc_threadprivate_register( ident_t *,
542   /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
543   OMPRTL__kmpc_threadprivate_register,
544   // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
545   OMPRTL__kmpc_global_thread_num,
546   // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
547   // kmp_critical_name *crit);
548   OMPRTL__kmpc_critical,
549   // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
550   // global_tid, kmp_critical_name *crit, uintptr_t hint);
551   OMPRTL__kmpc_critical_with_hint,
552   // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
553   // kmp_critical_name *crit);
554   OMPRTL__kmpc_end_critical,
555   // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
556   // global_tid);
557   OMPRTL__kmpc_cancel_barrier,
558   // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
559   OMPRTL__kmpc_barrier,
560   // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
561   OMPRTL__kmpc_for_static_fini,
562   // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
563   // global_tid);
564   OMPRTL__kmpc_serialized_parallel,
565   // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
566   // global_tid);
567   OMPRTL__kmpc_end_serialized_parallel,
568   // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
569   // kmp_int32 num_threads);
570   OMPRTL__kmpc_push_num_threads,
571   // Call to void __kmpc_flush(ident_t *loc);
572   OMPRTL__kmpc_flush,
573   // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
574   OMPRTL__kmpc_master,
575   // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
576   OMPRTL__kmpc_end_master,
577   // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
578   // int end_part);
579   OMPRTL__kmpc_omp_taskyield,
580   // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
581   OMPRTL__kmpc_single,
582   // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
583   OMPRTL__kmpc_end_single,
584   // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
585   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
586   // kmp_routine_entry_t *task_entry);
587   OMPRTL__kmpc_omp_task_alloc,
588   // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
589   // new_task);
590   OMPRTL__kmpc_omp_task,
591   // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
592   // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
593   // kmp_int32 didit);
594   OMPRTL__kmpc_copyprivate,
595   // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
596   // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
597   // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
598   OMPRTL__kmpc_reduce,
599   // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
600   // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
601   // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
602   // *lck);
603   OMPRTL__kmpc_reduce_nowait,
604   // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
605   // kmp_critical_name *lck);
606   OMPRTL__kmpc_end_reduce,
607   // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
608   // kmp_critical_name *lck);
609   OMPRTL__kmpc_end_reduce_nowait,
610   // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
611   // kmp_task_t * new_task);
612   OMPRTL__kmpc_omp_task_begin_if0,
613   // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
614   // kmp_task_t * new_task);
615   OMPRTL__kmpc_omp_task_complete_if0,
616   // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
617   OMPRTL__kmpc_ordered,
618   // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
619   OMPRTL__kmpc_end_ordered,
620   // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
621   // global_tid);
622   OMPRTL__kmpc_omp_taskwait,
623   // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
624   OMPRTL__kmpc_taskgroup,
625   // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
626   OMPRTL__kmpc_end_taskgroup,
627   // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
628   // int proc_bind);
629   OMPRTL__kmpc_push_proc_bind,
630   // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
631   // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
632   // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
633   OMPRTL__kmpc_omp_task_with_deps,
634   // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
635   // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
636   // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
637   OMPRTL__kmpc_omp_wait_deps,
638   // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
639   // global_tid, kmp_int32 cncl_kind);
640   OMPRTL__kmpc_cancellationpoint,
641   // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
642   // kmp_int32 cncl_kind);
643   OMPRTL__kmpc_cancel,
644   // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
645   // kmp_int32 num_teams, kmp_int32 thread_limit);
646   OMPRTL__kmpc_push_num_teams,
647   // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
648   // microtask, ...);
649   OMPRTL__kmpc_fork_teams,
650   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
651   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
652   // sched, kmp_uint64 grainsize, void *task_dup);
653   OMPRTL__kmpc_taskloop,
654   // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
655   // num_dims, struct kmp_dim *dims);
656   OMPRTL__kmpc_doacross_init,
657   // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
658   OMPRTL__kmpc_doacross_fini,
659   // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
660   // *vec);
661   OMPRTL__kmpc_doacross_post,
662   // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
663   // *vec);
664   OMPRTL__kmpc_doacross_wait,
665   // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
666   // *data);
667   OMPRTL__kmpc_task_reduction_init,
668   // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
669   // *d);
670   OMPRTL__kmpc_task_reduction_get_th_data,
671 
672   //
673   // Offloading related calls
674   //
675   // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
676   // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
677   // *arg_types);
678   OMPRTL__tgt_target,
679   // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
680   // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
681   // *arg_types);
682   OMPRTL__tgt_target_nowait,
683   // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
684   // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
685   // *arg_types, int32_t num_teams, int32_t thread_limit);
686   OMPRTL__tgt_target_teams,
687   // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
688   // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t
689   // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
690   OMPRTL__tgt_target_teams_nowait,
691   // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
692   OMPRTL__tgt_register_lib,
693   // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
694   OMPRTL__tgt_unregister_lib,
695   // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
696   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
697   OMPRTL__tgt_target_data_begin,
698   // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
699   // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
700   // *arg_types);
701   OMPRTL__tgt_target_data_begin_nowait,
702   // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
703   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
704   OMPRTL__tgt_target_data_end,
705   // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
706   // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
707   // *arg_types);
708   OMPRTL__tgt_target_data_end_nowait,
709   // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
710   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
711   OMPRTL__tgt_target_data_update,
712   // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
713   // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
714   // *arg_types);
715   OMPRTL__tgt_target_data_update_nowait,
716 };
717 
718 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
719 /// region.
720 class CleanupTy final : public EHScopeStack::Cleanup {
721   PrePostActionTy *Action;
722 
723 public:
724   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
725   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
726     if (!CGF.HaveInsertPoint())
727       return;
728     Action->Exit(CGF);
729   }
730 };
731 
732 } // anonymous namespace
733 
734 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
735   CodeGenFunction::RunCleanupsScope Scope(CGF);
736   if (PrePostAction) {
737     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
738     Callback(CodeGen, CGF, *PrePostAction);
739   } else {
740     PrePostActionTy Action;
741     Callback(CodeGen, CGF, Action);
742   }
743 }
744 
745 /// Check if the combiner is a call to UDR combiner and if it is so return the
746 /// UDR decl used for reduction.
747 static const OMPDeclareReductionDecl *
748 getReductionInit(const Expr *ReductionOp) {
749   if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
750     if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
751       if (auto *DRE =
752               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
753         if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
754           return DRD;
755   return nullptr;
756 }
757 
758 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
759                                              const OMPDeclareReductionDecl *DRD,
760                                              const Expr *InitOp,
761                                              Address Private, Address Original,
762                                              QualType Ty) {
763   if (DRD->getInitializer()) {
764     std::pair<llvm::Function *, llvm::Function *> Reduction =
765         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
766     auto *CE = cast<CallExpr>(InitOp);
767     auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
768     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
769     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
770     auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
771     auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
772     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
773     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
774                             [=]() -> Address { return Private; });
775     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
776                             [=]() -> Address { return Original; });
777     (void)PrivateScope.Privatize();
778     RValue Func = RValue::get(Reduction.second);
779     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
780     CGF.EmitIgnoredExpr(InitOp);
781   } else {
782     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
783     auto *GV = new llvm::GlobalVariable(
784         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
785         llvm::GlobalValue::PrivateLinkage, Init, ".init");
786     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
787     RValue InitRVal;
788     switch (CGF.getEvaluationKind(Ty)) {
789     case TEK_Scalar:
790       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
791       break;
792     case TEK_Complex:
793       InitRVal =
794           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
795       break;
796     case TEK_Aggregate:
797       InitRVal = RValue::getAggregate(LV.getAddress());
798       break;
799     }
800     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
801     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
802     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
803                          /*IsInitializer=*/false);
804   }
805 }
806 
807 /// \brief Emit initialization of arrays of complex types.
808 /// \param DestAddr Address of the array.
809 /// \param Type Type of array.
810 /// \param Init Initial expression of array.
811 /// \param SrcAddr Address of the original array.
812 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
813                                  QualType Type, bool EmitDeclareReductionInit,
814                                  const Expr *Init,
815                                  const OMPDeclareReductionDecl *DRD,
816                                  Address SrcAddr = Address::invalid()) {
817   // Perform element-by-element initialization.
818   QualType ElementTy;
819 
820   // Drill down to the base element type on both arrays.
821   auto ArrayTy = Type->getAsArrayTypeUnsafe();
822   auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
823   DestAddr =
824       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
825   if (DRD)
826     SrcAddr =
827         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
828 
829   llvm::Value *SrcBegin = nullptr;
830   if (DRD)
831     SrcBegin = SrcAddr.getPointer();
832   auto DestBegin = DestAddr.getPointer();
833   // Cast from pointer to array type to pointer to single element.
834   auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
835   // The basic structure here is a while-do loop.
836   auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
837   auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
838   auto IsEmpty =
839       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
840   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
841 
842   // Enter the loop body, making that address the current address.
843   auto EntryBB = CGF.Builder.GetInsertBlock();
844   CGF.EmitBlock(BodyBB);
845 
846   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
847 
848   llvm::PHINode *SrcElementPHI = nullptr;
849   Address SrcElementCurrent = Address::invalid();
850   if (DRD) {
851     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
852                                           "omp.arraycpy.srcElementPast");
853     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
854     SrcElementCurrent =
855         Address(SrcElementPHI,
856                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
857   }
858   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
859       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
860   DestElementPHI->addIncoming(DestBegin, EntryBB);
861   Address DestElementCurrent =
862       Address(DestElementPHI,
863               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
864 
865   // Emit copy.
866   {
867     CodeGenFunction::RunCleanupsScope InitScope(CGF);
868     if (EmitDeclareReductionInit) {
869       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
870                                        SrcElementCurrent, ElementTy);
871     } else
872       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
873                            /*IsInitializer=*/false);
874   }
875 
876   if (DRD) {
877     // Shift the address forward by one element.
878     auto SrcElementNext = CGF.Builder.CreateConstGEP1_32(
879         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
880     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
881   }
882 
883   // Shift the address forward by one element.
884   auto DestElementNext = CGF.Builder.CreateConstGEP1_32(
885       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
886   // Check whether we've reached the end.
887   auto Done =
888       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
889   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
890   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
891 
892   // Done.
893   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
894 }
895 
896 static llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy>
897 isDeclareTargetDeclaration(const ValueDecl *VD) {
898   for (const auto *D : VD->redecls()) {
899     if (!D->hasAttrs())
900       continue;
901     if (const auto *Attr = D->getAttr<OMPDeclareTargetDeclAttr>())
902       return Attr->getMapType();
903   }
904   return llvm::None;
905 }
906 
907 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
908   return CGF.EmitOMPSharedLValue(E);
909 }
910 
911 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
912                                             const Expr *E) {
913   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
914     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
915   return LValue();
916 }
917 
918 void ReductionCodeGen::emitAggregateInitialization(
919     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
920     const OMPDeclareReductionDecl *DRD) {
921   // Emit VarDecl with copy init for arrays.
922   // Get the address of the original variable captured in current
923   // captured region.
924   auto *PrivateVD =
925       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
926   bool EmitDeclareReductionInit =
927       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
928   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
929                        EmitDeclareReductionInit,
930                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
931                                                 : PrivateVD->getInit(),
932                        DRD, SharedLVal.getAddress());
933 }
934 
935 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
936                                    ArrayRef<const Expr *> Privates,
937                                    ArrayRef<const Expr *> ReductionOps) {
938   ClausesData.reserve(Shareds.size());
939   SharedAddresses.reserve(Shareds.size());
940   Sizes.reserve(Shareds.size());
941   BaseDecls.reserve(Shareds.size());
942   auto IPriv = Privates.begin();
943   auto IRed = ReductionOps.begin();
944   for (const auto *Ref : Shareds) {
945     ClausesData.emplace_back(Ref, *IPriv, *IRed);
946     std::advance(IPriv, 1);
947     std::advance(IRed, 1);
948   }
949 }
950 
951 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
952   assert(SharedAddresses.size() == N &&
953          "Number of generated lvalues must be exactly N.");
954   LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
955   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
956   SharedAddresses.emplace_back(First, Second);
957 }
958 
959 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
960   auto *PrivateVD =
961       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
962   QualType PrivateType = PrivateVD->getType();
963   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
964   if (!PrivateType->isVariablyModifiedType()) {
965     Sizes.emplace_back(
966         CGF.getTypeSize(
967             SharedAddresses[N].first.getType().getNonReferenceType()),
968         nullptr);
969     return;
970   }
971   llvm::Value *Size;
972   llvm::Value *SizeInChars;
973   llvm::Type *ElemType =
974       cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType())
975           ->getElementType();
976   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
977   if (AsArraySection) {
978     Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
979                                      SharedAddresses[N].first.getPointer());
980     Size = CGF.Builder.CreateNUWAdd(
981         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
982     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
983   } else {
984     SizeInChars = CGF.getTypeSize(
985         SharedAddresses[N].first.getType().getNonReferenceType());
986     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
987   }
988   Sizes.emplace_back(SizeInChars, Size);
989   CodeGenFunction::OpaqueValueMapping OpaqueMap(
990       CGF,
991       cast<OpaqueValueExpr>(
992           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
993       RValue::get(Size));
994   CGF.EmitVariablyModifiedType(PrivateType);
995 }
996 
997 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
998                                          llvm::Value *Size) {
999   auto *PrivateVD =
1000       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1001   QualType PrivateType = PrivateVD->getType();
1002   if (!PrivateType->isVariablyModifiedType()) {
1003     assert(!Size && !Sizes[N].second &&
1004            "Size should be nullptr for non-variably modified reduction "
1005            "items.");
1006     return;
1007   }
1008   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1009       CGF,
1010       cast<OpaqueValueExpr>(
1011           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1012       RValue::get(Size));
1013   CGF.EmitVariablyModifiedType(PrivateType);
1014 }
1015 
1016 void ReductionCodeGen::emitInitialization(
1017     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1018     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1019   assert(SharedAddresses.size() > N && "No variable was generated");
1020   auto *PrivateVD =
1021       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1022   auto *DRD = getReductionInit(ClausesData[N].ReductionOp);
1023   QualType PrivateType = PrivateVD->getType();
1024   PrivateAddr = CGF.Builder.CreateElementBitCast(
1025       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1026   QualType SharedType = SharedAddresses[N].first.getType();
1027   SharedLVal = CGF.MakeAddrLValue(
1028       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
1029                                        CGF.ConvertTypeForMem(SharedType)),
1030       SharedType, SharedAddresses[N].first.getBaseInfo(),
1031       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1032   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1033     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1034   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1035     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1036                                      PrivateAddr, SharedLVal.getAddress(),
1037                                      SharedLVal.getType());
1038   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1039              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1040     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1041                          PrivateVD->getType().getQualifiers(),
1042                          /*IsInitializer=*/false);
1043   }
1044 }
1045 
1046 bool ReductionCodeGen::needCleanups(unsigned N) {
1047   auto *PrivateVD =
1048       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1049   QualType PrivateType = PrivateVD->getType();
1050   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1051   return DTorKind != QualType::DK_none;
1052 }
1053 
1054 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
1055                                     Address PrivateAddr) {
1056   auto *PrivateVD =
1057       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1058   QualType PrivateType = PrivateVD->getType();
1059   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1060   if (needCleanups(N)) {
1061     PrivateAddr = CGF.Builder.CreateElementBitCast(
1062         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1063     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1064   }
1065 }
1066 
1067 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1068                           LValue BaseLV) {
1069   BaseTy = BaseTy.getNonReferenceType();
1070   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1071          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1072     if (auto *PtrTy = BaseTy->getAs<PointerType>())
1073       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
1074     else {
1075       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
1076       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1077     }
1078     BaseTy = BaseTy->getPointeeType();
1079   }
1080   return CGF.MakeAddrLValue(
1081       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(),
1082                                        CGF.ConvertTypeForMem(ElTy)),
1083       BaseLV.getType(), BaseLV.getBaseInfo(),
1084       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1085 }
1086 
1087 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1088                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1089                           llvm::Value *Addr) {
1090   Address Tmp = Address::invalid();
1091   Address TopTmp = Address::invalid();
1092   Address MostTopTmp = Address::invalid();
1093   BaseTy = BaseTy.getNonReferenceType();
1094   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1095          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1096     Tmp = CGF.CreateMemTemp(BaseTy);
1097     if (TopTmp.isValid())
1098       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1099     else
1100       MostTopTmp = Tmp;
1101     TopTmp = Tmp;
1102     BaseTy = BaseTy->getPointeeType();
1103   }
1104   llvm::Type *Ty = BaseLVType;
1105   if (Tmp.isValid())
1106     Ty = Tmp.getElementType();
1107   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1108   if (Tmp.isValid()) {
1109     CGF.Builder.CreateStore(Addr, Tmp);
1110     return MostTopTmp;
1111   }
1112   return Address(Addr, BaseLVAlignment);
1113 }
1114 
1115 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1116   const VarDecl *OrigVD = nullptr;
1117   if (auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1118     auto *Base = OASE->getBase()->IgnoreParenImpCasts();
1119     while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1120       Base = TempOASE->getBase()->IgnoreParenImpCasts();
1121     while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1122       Base = TempASE->getBase()->IgnoreParenImpCasts();
1123     DE = cast<DeclRefExpr>(Base);
1124     OrigVD = cast<VarDecl>(DE->getDecl());
1125   } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1126     auto *Base = ASE->getBase()->IgnoreParenImpCasts();
1127     while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1128       Base = TempASE->getBase()->IgnoreParenImpCasts();
1129     DE = cast<DeclRefExpr>(Base);
1130     OrigVD = cast<VarDecl>(DE->getDecl());
1131   }
1132   return OrigVD;
1133 }
1134 
1135 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1136                                                Address PrivateAddr) {
1137   const DeclRefExpr *DE;
1138   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1139     BaseDecls.emplace_back(OrigVD);
1140     auto OriginalBaseLValue = CGF.EmitLValue(DE);
1141     LValue BaseLValue =
1142         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1143                     OriginalBaseLValue);
1144     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1145         BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
1146     llvm::Value *PrivatePointer =
1147         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1148             PrivateAddr.getPointer(),
1149             SharedAddresses[N].first.getAddress().getType());
1150     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1151     return castToBase(CGF, OrigVD->getType(),
1152                       SharedAddresses[N].first.getType(),
1153                       OriginalBaseLValue.getAddress().getType(),
1154                       OriginalBaseLValue.getAlignment(), Ptr);
1155   }
1156   BaseDecls.emplace_back(
1157       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1158   return PrivateAddr;
1159 }
1160 
1161 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1162   auto *DRD = getReductionInit(ClausesData[N].ReductionOp);
1163   return DRD && DRD->getInitializer();
1164 }
1165 
1166 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1167   return CGF.EmitLoadOfPointerLValue(
1168       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1169       getThreadIDVariable()->getType()->castAs<PointerType>());
1170 }
1171 
1172 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1173   if (!CGF.HaveInsertPoint())
1174     return;
1175   // 1.2.2 OpenMP Language Terminology
1176   // Structured block - An executable statement with a single entry at the
1177   // top and a single exit at the bottom.
1178   // The point of exit cannot be a branch out of the structured block.
1179   // longjmp() and throw() must not violate the entry/exit criteria.
1180   CGF.EHStack.pushTerminate();
1181   CodeGen(CGF);
1182   CGF.EHStack.popTerminate();
1183 }
1184 
1185 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1186     CodeGenFunction &CGF) {
1187   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1188                             getThreadIDVariable()->getType(),
1189                             AlignmentSource::Decl);
1190 }
1191 
1192 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
1193     : CGM(CGM), OffloadEntriesInfoManager(CGM) {
1194   IdentTy = llvm::StructType::create(
1195       "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
1196       CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
1197       CGM.Int8PtrTy /* psource */);
1198   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1199 
1200   loadOffloadInfoMetadata();
1201 }
1202 
1203 void CGOpenMPRuntime::clear() {
1204   InternalVars.clear();
1205 }
1206 
1207 static llvm::Function *
1208 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1209                           const Expr *CombinerInitializer, const VarDecl *In,
1210                           const VarDecl *Out, bool IsCombiner) {
1211   // void .omp_combiner.(Ty *in, Ty *out);
1212   auto &C = CGM.getContext();
1213   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1214   FunctionArgList Args;
1215   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1216                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1217   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1218                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1219   Args.push_back(&OmpOutParm);
1220   Args.push_back(&OmpInParm);
1221   auto &FnInfo =
1222       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1223   auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1224   auto *Fn = llvm::Function::Create(
1225       FnTy, llvm::GlobalValue::InternalLinkage,
1226       IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule());
1227   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1228   Fn->removeFnAttr(llvm::Attribute::NoInline);
1229   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1230   Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1231   CodeGenFunction CGF(CGM);
1232   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1233   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1234   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1235                     Out->getLocation());
1236   CodeGenFunction::OMPPrivateScope Scope(CGF);
1237   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1238   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address {
1239     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1240         .getAddress();
1241   });
1242   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1243   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() -> Address {
1244     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1245         .getAddress();
1246   });
1247   (void)Scope.Privatize();
1248   if (!IsCombiner && Out->hasInit() &&
1249       !CGF.isTrivialInitializer(Out->getInit())) {
1250     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1251                          Out->getType().getQualifiers(),
1252                          /*IsInitializer=*/true);
1253   }
1254   if (CombinerInitializer)
1255     CGF.EmitIgnoredExpr(CombinerInitializer);
1256   Scope.ForceCleanup();
1257   CGF.FinishFunction();
1258   return Fn;
1259 }
1260 
1261 void CGOpenMPRuntime::emitUserDefinedReduction(
1262     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1263   if (UDRMap.count(D) > 0)
1264     return;
1265   auto &C = CGM.getContext();
1266   if (!In || !Out) {
1267     In = &C.Idents.get("omp_in");
1268     Out = &C.Idents.get("omp_out");
1269   }
1270   llvm::Function *Combiner = emitCombinerOrInitializer(
1271       CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()),
1272       cast<VarDecl>(D->lookup(Out).front()),
1273       /*IsCombiner=*/true);
1274   llvm::Function *Initializer = nullptr;
1275   if (auto *Init = D->getInitializer()) {
1276     if (!Priv || !Orig) {
1277       Priv = &C.Idents.get("omp_priv");
1278       Orig = &C.Idents.get("omp_orig");
1279     }
1280     Initializer = emitCombinerOrInitializer(
1281         CGM, D->getType(),
1282         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1283                                                                      : nullptr,
1284         cast<VarDecl>(D->lookup(Orig).front()),
1285         cast<VarDecl>(D->lookup(Priv).front()),
1286         /*IsCombiner=*/false);
1287   }
1288   UDRMap.insert(std::make_pair(D, std::make_pair(Combiner, Initializer)));
1289   if (CGF) {
1290     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1291     Decls.second.push_back(D);
1292   }
1293 }
1294 
1295 std::pair<llvm::Function *, llvm::Function *>
1296 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1297   auto I = UDRMap.find(D);
1298   if (I != UDRMap.end())
1299     return I->second;
1300   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1301   return UDRMap.lookup(D);
1302 }
1303 
1304 // Layout information for ident_t.
1305 static CharUnits getIdentAlign(CodeGenModule &CGM) {
1306   return CGM.getPointerAlign();
1307 }
1308 static CharUnits getIdentSize(CodeGenModule &CGM) {
1309   assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign()));
1310   return CharUnits::fromQuantity(16) + CGM.getPointerSize();
1311 }
1312 static CharUnits getOffsetOfIdentField(IdentFieldIndex Field) {
1313   // All the fields except the last are i32, so this works beautifully.
1314   return unsigned(Field) * CharUnits::fromQuantity(4);
1315 }
1316 static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr,
1317                                    IdentFieldIndex Field,
1318                                    const llvm::Twine &Name = "") {
1319   auto Offset = getOffsetOfIdentField(Field);
1320   return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name);
1321 }
1322 
1323 static llvm::Value *emitParallelOrTeamsOutlinedFunction(
1324     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1325     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1326     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1327   assert(ThreadIDVar->getType()->isPointerType() &&
1328          "thread id variable must be of type kmp_int32 *");
1329   CodeGenFunction CGF(CGM, true);
1330   bool HasCancel = false;
1331   if (auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1332     HasCancel = OPD->hasCancel();
1333   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1334     HasCancel = OPSD->hasCancel();
1335   else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1336     HasCancel = OPFD->hasCancel();
1337   else if (auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1338     HasCancel = OPFD->hasCancel();
1339   else if (auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1340     HasCancel = OPFD->hasCancel();
1341   else if (auto *OPFD = dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1342     HasCancel = OPFD->hasCancel();
1343   else if (auto *OPFD =
1344                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1345     HasCancel = OPFD->hasCancel();
1346   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1347                                     HasCancel, OutlinedHelperName);
1348   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1349   return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
1350 }
1351 
1352 llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction(
1353     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1354     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1355   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1356   return emitParallelOrTeamsOutlinedFunction(
1357       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1358 }
1359 
1360 llvm::Value *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1361     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1362     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1363   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1364   return emitParallelOrTeamsOutlinedFunction(
1365       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1366 }
1367 
1368 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
1369     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1370     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1371     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1372     bool Tied, unsigned &NumberOfParts) {
1373   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1374                                               PrePostActionTy &) {
1375     auto *ThreadID = getThreadID(CGF, D.getLocStart());
1376     auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart());
1377     llvm::Value *TaskArgs[] = {
1378         UpLoc, ThreadID,
1379         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1380                                     TaskTVar->getType()->castAs<PointerType>())
1381             .getPointer()};
1382     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1383   };
1384   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1385                                                             UntiedCodeGen);
1386   CodeGen.setAction(Action);
1387   assert(!ThreadIDVar->getType()->isPointerType() &&
1388          "thread id variable must be of type kmp_int32 for tasks");
1389   const OpenMPDirectiveKind Region =
1390       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1391                                                       : OMPD_task;
1392   auto *CS = D.getCapturedStmt(Region);
1393   auto *TD = dyn_cast<OMPTaskDirective>(&D);
1394   CodeGenFunction CGF(CGM, true);
1395   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1396                                         InnermostKind,
1397                                         TD ? TD->hasCancel() : false, Action);
1398   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1399   auto *Res = CGF.GenerateCapturedStmtFunction(*CS);
1400   if (!Tied)
1401     NumberOfParts = Action.getNumberOfParts();
1402   return Res;
1403 }
1404 
1405 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1406   CharUnits Align = getIdentAlign(CGM);
1407   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
1408   if (!Entry) {
1409     if (!DefaultOpenMPPSource) {
1410       // Initialize default location for psource field of ident_t structure of
1411       // all ident_t objects. Format is ";file;function;line;column;;".
1412       // Taken from
1413       // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
1414       DefaultOpenMPPSource =
1415           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1416       DefaultOpenMPPSource =
1417           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1418     }
1419 
1420     ConstantInitBuilder builder(CGM);
1421     auto fields = builder.beginStruct(IdentTy);
1422     fields.addInt(CGM.Int32Ty, 0);
1423     fields.addInt(CGM.Int32Ty, Flags);
1424     fields.addInt(CGM.Int32Ty, 0);
1425     fields.addInt(CGM.Int32Ty, 0);
1426     fields.add(DefaultOpenMPPSource);
1427     auto DefaultOpenMPLocation =
1428       fields.finishAndCreateGlobal("", Align, /*isConstant*/ true,
1429                                    llvm::GlobalValue::PrivateLinkage);
1430     DefaultOpenMPLocation->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
1431 
1432     OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
1433   }
1434   return Address(Entry, Align);
1435 }
1436 
1437 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1438                                                  SourceLocation Loc,
1439                                                  unsigned Flags) {
1440   Flags |= OMP_IDENT_KMPC;
1441   // If no debug info is generated - return global default location.
1442   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1443       Loc.isInvalid())
1444     return getOrCreateDefaultLocation(Flags).getPointer();
1445 
1446   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1447 
1448   Address LocValue = Address::invalid();
1449   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1450   if (I != OpenMPLocThreadIDMap.end())
1451     LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM));
1452 
1453   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1454   // GetOpenMPThreadID was called before this routine.
1455   if (!LocValue.isValid()) {
1456     // Generate "ident_t .kmpc_loc.addr;"
1457     Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM),
1458                                       ".kmpc_loc.addr");
1459     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1460     Elem.second.DebugLoc = AI.getPointer();
1461     LocValue = AI;
1462 
1463     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1464     CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
1465     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1466                              CGM.getSize(getIdentSize(CGF.CGM)));
1467   }
1468 
1469   // char **psource = &.kmpc_loc_<flags>.addr.psource;
1470   Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource);
1471 
1472   auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1473   if (OMPDebugLoc == nullptr) {
1474     SmallString<128> Buffer2;
1475     llvm::raw_svector_ostream OS2(Buffer2);
1476     // Build debug location
1477     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1478     OS2 << ";" << PLoc.getFilename() << ";";
1479     if (const FunctionDecl *FD =
1480             dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
1481       OS2 << FD->getQualifiedNameAsString();
1482     }
1483     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1484     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1485     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1486   }
1487   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1488   CGF.Builder.CreateStore(OMPDebugLoc, PSource);
1489 
1490   // Our callers always pass this to a runtime function, so for
1491   // convenience, go ahead and return a naked pointer.
1492   return LocValue.getPointer();
1493 }
1494 
1495 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1496                                           SourceLocation Loc) {
1497   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1498 
1499   llvm::Value *ThreadID = nullptr;
1500   // Check whether we've already cached a load of the thread id in this
1501   // function.
1502   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1503   if (I != OpenMPLocThreadIDMap.end()) {
1504     ThreadID = I->second.ThreadID;
1505     if (ThreadID != nullptr)
1506       return ThreadID;
1507   }
1508   // If exceptions are enabled, do not use parameter to avoid possible crash.
1509   if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1510       !CGF.getLangOpts().CXXExceptions ||
1511       CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1512     if (auto *OMPRegionInfo =
1513             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1514       if (OMPRegionInfo->getThreadIDVariable()) {
1515         // Check if this an outlined function with thread id passed as argument.
1516         auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1517         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1518         // If value loaded in entry block, cache it and use it everywhere in
1519         // function.
1520         if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1521           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1522           Elem.second.ThreadID = ThreadID;
1523         }
1524         return ThreadID;
1525       }
1526     }
1527   }
1528 
1529   // This is not an outlined function region - need to call __kmpc_int32
1530   // kmpc_global_thread_num(ident_t *loc).
1531   // Generate thread id value and cache this value for use across the
1532   // function.
1533   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1534   CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
1535   auto *Call = CGF.Builder.CreateCall(
1536       createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1537       emitUpdateLocation(CGF, Loc));
1538   Call->setCallingConv(CGF.getRuntimeCC());
1539   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1540   Elem.second.ThreadID = Call;
1541   return Call;
1542 }
1543 
1544 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1545   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1546   if (OpenMPLocThreadIDMap.count(CGF.CurFn))
1547     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1548   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1549     for(auto *D : FunctionUDRMap[CGF.CurFn]) {
1550       UDRMap.erase(D);
1551     }
1552     FunctionUDRMap.erase(CGF.CurFn);
1553   }
1554 }
1555 
1556 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1557   if (!IdentTy) {
1558   }
1559   return llvm::PointerType::getUnqual(IdentTy);
1560 }
1561 
1562 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1563   if (!Kmpc_MicroTy) {
1564     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1565     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1566                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1567     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1568   }
1569   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1570 }
1571 
1572 llvm::Constant *
1573 CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1574   llvm::Constant *RTLFn = nullptr;
1575   switch (static_cast<OpenMPRTLFunction>(Function)) {
1576   case OMPRTL__kmpc_fork_call: {
1577     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1578     // microtask, ...);
1579     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1580                                 getKmpc_MicroPointerTy()};
1581     llvm::FunctionType *FnTy =
1582         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1583     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1584     break;
1585   }
1586   case OMPRTL__kmpc_global_thread_num: {
1587     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1588     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1589     llvm::FunctionType *FnTy =
1590         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1591     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1592     break;
1593   }
1594   case OMPRTL__kmpc_threadprivate_cached: {
1595     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1596     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1597     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1598                                 CGM.VoidPtrTy, CGM.SizeTy,
1599                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1600     llvm::FunctionType *FnTy =
1601         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1602     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1603     break;
1604   }
1605   case OMPRTL__kmpc_critical: {
1606     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1607     // kmp_critical_name *crit);
1608     llvm::Type *TypeParams[] = {
1609         getIdentTyPointerTy(), CGM.Int32Ty,
1610         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1611     llvm::FunctionType *FnTy =
1612         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1613     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1614     break;
1615   }
1616   case OMPRTL__kmpc_critical_with_hint: {
1617     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1618     // kmp_critical_name *crit, uintptr_t hint);
1619     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1620                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
1621                                 CGM.IntPtrTy};
1622     llvm::FunctionType *FnTy =
1623         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1624     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1625     break;
1626   }
1627   case OMPRTL__kmpc_threadprivate_register: {
1628     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1629     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1630     // typedef void *(*kmpc_ctor)(void *);
1631     auto KmpcCtorTy =
1632         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1633                                 /*isVarArg*/ false)->getPointerTo();
1634     // typedef void *(*kmpc_cctor)(void *, void *);
1635     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1636     auto KmpcCopyCtorTy =
1637         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1638                                 /*isVarArg*/ false)->getPointerTo();
1639     // typedef void (*kmpc_dtor)(void *);
1640     auto KmpcDtorTy =
1641         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1642             ->getPointerTo();
1643     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1644                               KmpcCopyCtorTy, KmpcDtorTy};
1645     auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1646                                         /*isVarArg*/ false);
1647     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1648     break;
1649   }
1650   case OMPRTL__kmpc_end_critical: {
1651     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1652     // kmp_critical_name *crit);
1653     llvm::Type *TypeParams[] = {
1654         getIdentTyPointerTy(), CGM.Int32Ty,
1655         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1656     llvm::FunctionType *FnTy =
1657         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1658     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1659     break;
1660   }
1661   case OMPRTL__kmpc_cancel_barrier: {
1662     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1663     // global_tid);
1664     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1665     llvm::FunctionType *FnTy =
1666         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1667     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1668     break;
1669   }
1670   case OMPRTL__kmpc_barrier: {
1671     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1672     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1673     llvm::FunctionType *FnTy =
1674         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1675     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1676     break;
1677   }
1678   case OMPRTL__kmpc_for_static_fini: {
1679     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1680     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1681     llvm::FunctionType *FnTy =
1682         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1683     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1684     break;
1685   }
1686   case OMPRTL__kmpc_push_num_threads: {
1687     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1688     // kmp_int32 num_threads)
1689     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1690                                 CGM.Int32Ty};
1691     llvm::FunctionType *FnTy =
1692         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1693     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1694     break;
1695   }
1696   case OMPRTL__kmpc_serialized_parallel: {
1697     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1698     // global_tid);
1699     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1700     llvm::FunctionType *FnTy =
1701         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1702     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1703     break;
1704   }
1705   case OMPRTL__kmpc_end_serialized_parallel: {
1706     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1707     // global_tid);
1708     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1709     llvm::FunctionType *FnTy =
1710         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1711     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1712     break;
1713   }
1714   case OMPRTL__kmpc_flush: {
1715     // Build void __kmpc_flush(ident_t *loc);
1716     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1717     llvm::FunctionType *FnTy =
1718         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1719     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1720     break;
1721   }
1722   case OMPRTL__kmpc_master: {
1723     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1724     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1725     llvm::FunctionType *FnTy =
1726         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1727     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1728     break;
1729   }
1730   case OMPRTL__kmpc_end_master: {
1731     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1732     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1733     llvm::FunctionType *FnTy =
1734         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1735     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1736     break;
1737   }
1738   case OMPRTL__kmpc_omp_taskyield: {
1739     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1740     // int end_part);
1741     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1742     llvm::FunctionType *FnTy =
1743         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1744     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1745     break;
1746   }
1747   case OMPRTL__kmpc_single: {
1748     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1749     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1750     llvm::FunctionType *FnTy =
1751         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1752     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1753     break;
1754   }
1755   case OMPRTL__kmpc_end_single: {
1756     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1757     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1758     llvm::FunctionType *FnTy =
1759         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1760     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1761     break;
1762   }
1763   case OMPRTL__kmpc_omp_task_alloc: {
1764     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1765     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1766     // kmp_routine_entry_t *task_entry);
1767     assert(KmpRoutineEntryPtrTy != nullptr &&
1768            "Type kmp_routine_entry_t must be created.");
1769     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1770                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
1771     // Return void * and then cast to particular kmp_task_t type.
1772     llvm::FunctionType *FnTy =
1773         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1774     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
1775     break;
1776   }
1777   case OMPRTL__kmpc_omp_task: {
1778     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1779     // *new_task);
1780     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1781                                 CGM.VoidPtrTy};
1782     llvm::FunctionType *FnTy =
1783         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1784     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
1785     break;
1786   }
1787   case OMPRTL__kmpc_copyprivate: {
1788     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
1789     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
1790     // kmp_int32 didit);
1791     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1792     auto *CpyFnTy =
1793         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
1794     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
1795                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
1796                                 CGM.Int32Ty};
1797     llvm::FunctionType *FnTy =
1798         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1799     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
1800     break;
1801   }
1802   case OMPRTL__kmpc_reduce: {
1803     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
1804     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
1805     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
1806     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1807     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1808                                                /*isVarArg=*/false);
1809     llvm::Type *TypeParams[] = {
1810         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1811         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1812         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1813     llvm::FunctionType *FnTy =
1814         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1815     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
1816     break;
1817   }
1818   case OMPRTL__kmpc_reduce_nowait: {
1819     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
1820     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
1821     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
1822     // *lck);
1823     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1824     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1825                                                /*isVarArg=*/false);
1826     llvm::Type *TypeParams[] = {
1827         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1828         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1829         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1830     llvm::FunctionType *FnTy =
1831         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1832     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
1833     break;
1834   }
1835   case OMPRTL__kmpc_end_reduce: {
1836     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
1837     // kmp_critical_name *lck);
1838     llvm::Type *TypeParams[] = {
1839         getIdentTyPointerTy(), CGM.Int32Ty,
1840         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1841     llvm::FunctionType *FnTy =
1842         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1843     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
1844     break;
1845   }
1846   case OMPRTL__kmpc_end_reduce_nowait: {
1847     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
1848     // kmp_critical_name *lck);
1849     llvm::Type *TypeParams[] = {
1850         getIdentTyPointerTy(), CGM.Int32Ty,
1851         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1852     llvm::FunctionType *FnTy =
1853         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1854     RTLFn =
1855         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
1856     break;
1857   }
1858   case OMPRTL__kmpc_omp_task_begin_if0: {
1859     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1860     // *new_task);
1861     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1862                                 CGM.VoidPtrTy};
1863     llvm::FunctionType *FnTy =
1864         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1865     RTLFn =
1866         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
1867     break;
1868   }
1869   case OMPRTL__kmpc_omp_task_complete_if0: {
1870     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1871     // *new_task);
1872     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1873                                 CGM.VoidPtrTy};
1874     llvm::FunctionType *FnTy =
1875         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1876     RTLFn = CGM.CreateRuntimeFunction(FnTy,
1877                                       /*Name=*/"__kmpc_omp_task_complete_if0");
1878     break;
1879   }
1880   case OMPRTL__kmpc_ordered: {
1881     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
1882     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1883     llvm::FunctionType *FnTy =
1884         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1885     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
1886     break;
1887   }
1888   case OMPRTL__kmpc_end_ordered: {
1889     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
1890     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1891     llvm::FunctionType *FnTy =
1892         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1893     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
1894     break;
1895   }
1896   case OMPRTL__kmpc_omp_taskwait: {
1897     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
1898     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1899     llvm::FunctionType *FnTy =
1900         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1901     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
1902     break;
1903   }
1904   case OMPRTL__kmpc_taskgroup: {
1905     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
1906     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1907     llvm::FunctionType *FnTy =
1908         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1909     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
1910     break;
1911   }
1912   case OMPRTL__kmpc_end_taskgroup: {
1913     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
1914     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1915     llvm::FunctionType *FnTy =
1916         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1917     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
1918     break;
1919   }
1920   case OMPRTL__kmpc_push_proc_bind: {
1921     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
1922     // int proc_bind)
1923     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1924     llvm::FunctionType *FnTy =
1925         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1926     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
1927     break;
1928   }
1929   case OMPRTL__kmpc_omp_task_with_deps: {
1930     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
1931     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
1932     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
1933     llvm::Type *TypeParams[] = {
1934         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
1935         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
1936     llvm::FunctionType *FnTy =
1937         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1938     RTLFn =
1939         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
1940     break;
1941   }
1942   case OMPRTL__kmpc_omp_wait_deps: {
1943     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
1944     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
1945     // kmp_depend_info_t *noalias_dep_list);
1946     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1947                                 CGM.Int32Ty,           CGM.VoidPtrTy,
1948                                 CGM.Int32Ty,           CGM.VoidPtrTy};
1949     llvm::FunctionType *FnTy =
1950         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1951     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
1952     break;
1953   }
1954   case OMPRTL__kmpc_cancellationpoint: {
1955     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
1956     // global_tid, kmp_int32 cncl_kind)
1957     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1958     llvm::FunctionType *FnTy =
1959         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1960     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
1961     break;
1962   }
1963   case OMPRTL__kmpc_cancel: {
1964     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
1965     // kmp_int32 cncl_kind)
1966     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1967     llvm::FunctionType *FnTy =
1968         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1969     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
1970     break;
1971   }
1972   case OMPRTL__kmpc_push_num_teams: {
1973     // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
1974     // kmp_int32 num_teams, kmp_int32 num_threads)
1975     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1976         CGM.Int32Ty};
1977     llvm::FunctionType *FnTy =
1978         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1979     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
1980     break;
1981   }
1982   case OMPRTL__kmpc_fork_teams: {
1983     // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
1984     // microtask, ...);
1985     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1986                                 getKmpc_MicroPointerTy()};
1987     llvm::FunctionType *FnTy =
1988         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1989     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
1990     break;
1991   }
1992   case OMPRTL__kmpc_taskloop: {
1993     // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
1994     // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
1995     // sched, kmp_uint64 grainsize, void *task_dup);
1996     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
1997                                 CGM.IntTy,
1998                                 CGM.VoidPtrTy,
1999                                 CGM.IntTy,
2000                                 CGM.Int64Ty->getPointerTo(),
2001                                 CGM.Int64Ty->getPointerTo(),
2002                                 CGM.Int64Ty,
2003                                 CGM.IntTy,
2004                                 CGM.IntTy,
2005                                 CGM.Int64Ty,
2006                                 CGM.VoidPtrTy};
2007     llvm::FunctionType *FnTy =
2008         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2009     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2010     break;
2011   }
2012   case OMPRTL__kmpc_doacross_init: {
2013     // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2014     // num_dims, struct kmp_dim *dims);
2015     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2016                                 CGM.Int32Ty,
2017                                 CGM.Int32Ty,
2018                                 CGM.VoidPtrTy};
2019     llvm::FunctionType *FnTy =
2020         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2021     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2022     break;
2023   }
2024   case OMPRTL__kmpc_doacross_fini: {
2025     // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2026     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2027     llvm::FunctionType *FnTy =
2028         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2029     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2030     break;
2031   }
2032   case OMPRTL__kmpc_doacross_post: {
2033     // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2034     // *vec);
2035     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2036                                 CGM.Int64Ty->getPointerTo()};
2037     llvm::FunctionType *FnTy =
2038         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2039     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2040     break;
2041   }
2042   case OMPRTL__kmpc_doacross_wait: {
2043     // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2044     // *vec);
2045     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2046                                 CGM.Int64Ty->getPointerTo()};
2047     llvm::FunctionType *FnTy =
2048         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2049     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2050     break;
2051   }
2052   case OMPRTL__kmpc_task_reduction_init: {
2053     // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2054     // *data);
2055     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2056     llvm::FunctionType *FnTy =
2057         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2058     RTLFn =
2059         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2060     break;
2061   }
2062   case OMPRTL__kmpc_task_reduction_get_th_data: {
2063     // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2064     // *d);
2065     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2066     llvm::FunctionType *FnTy =
2067         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2068     RTLFn = CGM.CreateRuntimeFunction(
2069         FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2070     break;
2071   }
2072   case OMPRTL__tgt_target: {
2073     // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2074     // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2075     // *arg_types);
2076     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2077                                 CGM.VoidPtrTy,
2078                                 CGM.Int32Ty,
2079                                 CGM.VoidPtrPtrTy,
2080                                 CGM.VoidPtrPtrTy,
2081                                 CGM.SizeTy->getPointerTo(),
2082                                 CGM.Int64Ty->getPointerTo()};
2083     llvm::FunctionType *FnTy =
2084         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2085     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2086     break;
2087   }
2088   case OMPRTL__tgt_target_nowait: {
2089     // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2090     // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
2091     // int64_t *arg_types);
2092     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2093                                 CGM.VoidPtrTy,
2094                                 CGM.Int32Ty,
2095                                 CGM.VoidPtrPtrTy,
2096                                 CGM.VoidPtrPtrTy,
2097                                 CGM.SizeTy->getPointerTo(),
2098                                 CGM.Int64Ty->getPointerTo()};
2099     llvm::FunctionType *FnTy =
2100         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2101     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2102     break;
2103   }
2104   case OMPRTL__tgt_target_teams: {
2105     // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2106     // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
2107     // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2108     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2109                                 CGM.VoidPtrTy,
2110                                 CGM.Int32Ty,
2111                                 CGM.VoidPtrPtrTy,
2112                                 CGM.VoidPtrPtrTy,
2113                                 CGM.SizeTy->getPointerTo(),
2114                                 CGM.Int64Ty->getPointerTo(),
2115                                 CGM.Int32Ty,
2116                                 CGM.Int32Ty};
2117     llvm::FunctionType *FnTy =
2118         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2119     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2120     break;
2121   }
2122   case OMPRTL__tgt_target_teams_nowait: {
2123     // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2124     // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t
2125     // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2126     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2127                                 CGM.VoidPtrTy,
2128                                 CGM.Int32Ty,
2129                                 CGM.VoidPtrPtrTy,
2130                                 CGM.VoidPtrPtrTy,
2131                                 CGM.SizeTy->getPointerTo(),
2132                                 CGM.Int64Ty->getPointerTo(),
2133                                 CGM.Int32Ty,
2134                                 CGM.Int32Ty};
2135     llvm::FunctionType *FnTy =
2136         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2137     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2138     break;
2139   }
2140   case OMPRTL__tgt_register_lib: {
2141     // Build void __tgt_register_lib(__tgt_bin_desc *desc);
2142     QualType ParamTy =
2143         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2144     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2145     llvm::FunctionType *FnTy =
2146         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2147     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
2148     break;
2149   }
2150   case OMPRTL__tgt_unregister_lib: {
2151     // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
2152     QualType ParamTy =
2153         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2154     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2155     llvm::FunctionType *FnTy =
2156         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2157     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
2158     break;
2159   }
2160   case OMPRTL__tgt_target_data_begin: {
2161     // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2162     // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2163     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2164                                 CGM.Int32Ty,
2165                                 CGM.VoidPtrPtrTy,
2166                                 CGM.VoidPtrPtrTy,
2167                                 CGM.SizeTy->getPointerTo(),
2168                                 CGM.Int64Ty->getPointerTo()};
2169     llvm::FunctionType *FnTy =
2170         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2171     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2172     break;
2173   }
2174   case OMPRTL__tgt_target_data_begin_nowait: {
2175     // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2176     // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2177     // *arg_types);
2178     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2179                                 CGM.Int32Ty,
2180                                 CGM.VoidPtrPtrTy,
2181                                 CGM.VoidPtrPtrTy,
2182                                 CGM.SizeTy->getPointerTo(),
2183                                 CGM.Int64Ty->getPointerTo()};
2184     auto *FnTy =
2185         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2186     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2187     break;
2188   }
2189   case OMPRTL__tgt_target_data_end: {
2190     // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2191     // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2192     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2193                                 CGM.Int32Ty,
2194                                 CGM.VoidPtrPtrTy,
2195                                 CGM.VoidPtrPtrTy,
2196                                 CGM.SizeTy->getPointerTo(),
2197                                 CGM.Int64Ty->getPointerTo()};
2198     llvm::FunctionType *FnTy =
2199         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2200     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2201     break;
2202   }
2203   case OMPRTL__tgt_target_data_end_nowait: {
2204     // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2205     // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2206     // *arg_types);
2207     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2208                                 CGM.Int32Ty,
2209                                 CGM.VoidPtrPtrTy,
2210                                 CGM.VoidPtrPtrTy,
2211                                 CGM.SizeTy->getPointerTo(),
2212                                 CGM.Int64Ty->getPointerTo()};
2213     auto *FnTy =
2214         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2215     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2216     break;
2217   }
2218   case OMPRTL__tgt_target_data_update: {
2219     // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2220     // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2221     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2222                                 CGM.Int32Ty,
2223                                 CGM.VoidPtrPtrTy,
2224                                 CGM.VoidPtrPtrTy,
2225                                 CGM.SizeTy->getPointerTo(),
2226                                 CGM.Int64Ty->getPointerTo()};
2227     llvm::FunctionType *FnTy =
2228         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2229     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2230     break;
2231   }
2232   case OMPRTL__tgt_target_data_update_nowait: {
2233     // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2234     // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2235     // *arg_types);
2236     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2237                                 CGM.Int32Ty,
2238                                 CGM.VoidPtrPtrTy,
2239                                 CGM.VoidPtrPtrTy,
2240                                 CGM.SizeTy->getPointerTo(),
2241                                 CGM.Int64Ty->getPointerTo()};
2242     auto *FnTy =
2243         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2244     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2245     break;
2246   }
2247   }
2248   assert(RTLFn && "Unable to find OpenMP runtime function");
2249   return RTLFn;
2250 }
2251 
2252 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
2253                                                              bool IVSigned) {
2254   assert((IVSize == 32 || IVSize == 64) &&
2255          "IV size is not compatible with the omp runtime");
2256   auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2257                                        : "__kmpc_for_static_init_4u")
2258                            : (IVSigned ? "__kmpc_for_static_init_8"
2259                                        : "__kmpc_for_static_init_8u");
2260   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2261   auto PtrTy = llvm::PointerType::getUnqual(ITy);
2262   llvm::Type *TypeParams[] = {
2263     getIdentTyPointerTy(),                     // loc
2264     CGM.Int32Ty,                               // tid
2265     CGM.Int32Ty,                               // schedtype
2266     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2267     PtrTy,                                     // p_lower
2268     PtrTy,                                     // p_upper
2269     PtrTy,                                     // p_stride
2270     ITy,                                       // incr
2271     ITy                                        // chunk
2272   };
2273   llvm::FunctionType *FnTy =
2274       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2275   return CGM.CreateRuntimeFunction(FnTy, Name);
2276 }
2277 
2278 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
2279                                                             bool IVSigned) {
2280   assert((IVSize == 32 || IVSize == 64) &&
2281          "IV size is not compatible with the omp runtime");
2282   auto Name =
2283       IVSize == 32
2284           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2285           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2286   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2287   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2288                                CGM.Int32Ty,           // tid
2289                                CGM.Int32Ty,           // schedtype
2290                                ITy,                   // lower
2291                                ITy,                   // upper
2292                                ITy,                   // stride
2293                                ITy                    // chunk
2294   };
2295   llvm::FunctionType *FnTy =
2296       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2297   return CGM.CreateRuntimeFunction(FnTy, Name);
2298 }
2299 
2300 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
2301                                                             bool IVSigned) {
2302   assert((IVSize == 32 || IVSize == 64) &&
2303          "IV size is not compatible with the omp runtime");
2304   auto Name =
2305       IVSize == 32
2306           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2307           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2308   llvm::Type *TypeParams[] = {
2309       getIdentTyPointerTy(), // loc
2310       CGM.Int32Ty,           // tid
2311   };
2312   llvm::FunctionType *FnTy =
2313       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2314   return CGM.CreateRuntimeFunction(FnTy, Name);
2315 }
2316 
2317 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
2318                                                             bool IVSigned) {
2319   assert((IVSize == 32 || IVSize == 64) &&
2320          "IV size is not compatible with the omp runtime");
2321   auto Name =
2322       IVSize == 32
2323           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2324           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2325   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2326   auto PtrTy = llvm::PointerType::getUnqual(ITy);
2327   llvm::Type *TypeParams[] = {
2328     getIdentTyPointerTy(),                     // loc
2329     CGM.Int32Ty,                               // tid
2330     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2331     PtrTy,                                     // p_lower
2332     PtrTy,                                     // p_upper
2333     PtrTy                                      // p_stride
2334   };
2335   llvm::FunctionType *FnTy =
2336       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2337   return CGM.CreateRuntimeFunction(FnTy, Name);
2338 }
2339 
2340 Address CGOpenMPRuntime::getAddrOfDeclareTargetLink(const VarDecl *VD) {
2341   if (CGM.getLangOpts().OpenMPSimd)
2342     return Address::invalid();
2343   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2344       isDeclareTargetDeclaration(VD);
2345   if (Res && *Res == OMPDeclareTargetDeclAttr::MT_Link) {
2346     SmallString<64> PtrName;
2347     {
2348       llvm::raw_svector_ostream OS(PtrName);
2349       OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_link_ptr";
2350     }
2351     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2352     if (!Ptr) {
2353       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2354       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
2355                                         PtrName);
2356       if (!CGM.getLangOpts().OpenMPIsDevice) {
2357         auto *GV = cast<llvm::GlobalVariable>(Ptr);
2358         GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
2359         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2360       }
2361       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ptr));
2362       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2363     }
2364     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2365   }
2366   return Address::invalid();
2367 }
2368 
2369 llvm::Constant *
2370 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
2371   assert(!CGM.getLangOpts().OpenMPUseTLS ||
2372          !CGM.getContext().getTargetInfo().isTLSSupported());
2373   // Lookup the entry, lazily creating it if necessary.
2374   return getOrCreateInternalVariable(CGM.Int8PtrPtrTy,
2375                                      Twine(CGM.getMangledName(VD)) + ".cache.");
2376 }
2377 
2378 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
2379                                                 const VarDecl *VD,
2380                                                 Address VDAddr,
2381                                                 SourceLocation Loc) {
2382   if (CGM.getLangOpts().OpenMPUseTLS &&
2383       CGM.getContext().getTargetInfo().isTLSSupported())
2384     return VDAddr;
2385 
2386   auto VarTy = VDAddr.getElementType();
2387   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2388                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2389                                                        CGM.Int8PtrTy),
2390                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
2391                          getOrCreateThreadPrivateCache(VD)};
2392   return Address(CGF.EmitRuntimeCall(
2393       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2394                  VDAddr.getAlignment());
2395 }
2396 
2397 void CGOpenMPRuntime::emitThreadPrivateVarInit(
2398     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2399     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2400   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2401   // library.
2402   auto OMPLoc = emitUpdateLocation(CGF, Loc);
2403   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
2404                       OMPLoc);
2405   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2406   // to register constructor/destructor for variable.
2407   llvm::Value *Args[] = {OMPLoc,
2408                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2409                                                        CGM.VoidPtrTy),
2410                          Ctor, CopyCtor, Dtor};
2411   CGF.EmitRuntimeCall(
2412       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
2413 }
2414 
2415 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
2416     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2417     bool PerformInit, CodeGenFunction *CGF) {
2418   if (CGM.getLangOpts().OpenMPUseTLS &&
2419       CGM.getContext().getTargetInfo().isTLSSupported())
2420     return nullptr;
2421 
2422   VD = VD->getDefinition(CGM.getContext());
2423   if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
2424     ThreadPrivateWithDefinition.insert(VD);
2425     QualType ASTTy = VD->getType();
2426 
2427     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2428     auto Init = VD->getAnyInitializer();
2429     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2430       // Generate function that re-emits the declaration's initializer into the
2431       // threadprivate copy of the variable VD
2432       CodeGenFunction CtorCGF(CGM);
2433       FunctionArgList Args;
2434       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2435                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2436                             ImplicitParamDecl::Other);
2437       Args.push_back(&Dst);
2438 
2439       auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2440           CGM.getContext().VoidPtrTy, Args);
2441       auto FTy = CGM.getTypes().GetFunctionType(FI);
2442       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
2443           FTy, ".__kmpc_global_ctor_.", FI, Loc);
2444       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2445                             Args, Loc, Loc);
2446       auto ArgVal = CtorCGF.EmitLoadOfScalar(
2447           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2448           CGM.getContext().VoidPtrTy, Dst.getLocation());
2449       Address Arg = Address(ArgVal, VDAddr.getAlignment());
2450       Arg = CtorCGF.Builder.CreateElementBitCast(
2451           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2452       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2453                                /*IsInitializer=*/true);
2454       ArgVal = CtorCGF.EmitLoadOfScalar(
2455           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2456           CGM.getContext().VoidPtrTy, Dst.getLocation());
2457       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2458       CtorCGF.FinishFunction();
2459       Ctor = Fn;
2460     }
2461     if (VD->getType().isDestructedType() != QualType::DK_none) {
2462       // Generate function that emits destructor call for the threadprivate copy
2463       // of the variable VD
2464       CodeGenFunction DtorCGF(CGM);
2465       FunctionArgList Args;
2466       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2467                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2468                             ImplicitParamDecl::Other);
2469       Args.push_back(&Dst);
2470 
2471       auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2472           CGM.getContext().VoidTy, Args);
2473       auto FTy = CGM.getTypes().GetFunctionType(FI);
2474       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
2475           FTy, ".__kmpc_global_dtor_.", FI, Loc);
2476       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2477       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2478                             Loc, Loc);
2479       // Create a scope with an artificial location for the body of this function.
2480       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2481       auto ArgVal = DtorCGF.EmitLoadOfScalar(
2482           DtorCGF.GetAddrOfLocalVar(&Dst),
2483           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2484       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2485                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2486                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2487       DtorCGF.FinishFunction();
2488       Dtor = Fn;
2489     }
2490     // Do not emit init function if it is not required.
2491     if (!Ctor && !Dtor)
2492       return nullptr;
2493 
2494     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2495     auto CopyCtorTy =
2496         llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2497                                 /*isVarArg=*/false)->getPointerTo();
2498     // Copying constructor for the threadprivate variable.
2499     // Must be NULL - reserved by runtime, but currently it requires that this
2500     // parameter is always NULL. Otherwise it fires assertion.
2501     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2502     if (Ctor == nullptr) {
2503       auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2504                                             /*isVarArg=*/false)->getPointerTo();
2505       Ctor = llvm::Constant::getNullValue(CtorTy);
2506     }
2507     if (Dtor == nullptr) {
2508       auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2509                                             /*isVarArg=*/false)->getPointerTo();
2510       Dtor = llvm::Constant::getNullValue(DtorTy);
2511     }
2512     if (!CGF) {
2513       auto InitFunctionTy =
2514           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2515       auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2516           InitFunctionTy, ".__omp_threadprivate_init_.",
2517           CGM.getTypes().arrangeNullaryFunction());
2518       CodeGenFunction InitCGF(CGM);
2519       FunctionArgList ArgList;
2520       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2521                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
2522                             Loc, Loc);
2523       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2524       InitCGF.FinishFunction();
2525       return InitFunction;
2526     }
2527     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2528   }
2529   return nullptr;
2530 }
2531 
2532 /// \brief Obtain information that uniquely identifies a target entry. This
2533 /// consists of the file and device IDs as well as line number associated with
2534 /// the relevant entry source location.
2535 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
2536                                      unsigned &DeviceID, unsigned &FileID,
2537                                      unsigned &LineNum) {
2538 
2539   auto &SM = C.getSourceManager();
2540 
2541   // The loc should be always valid and have a file ID (the user cannot use
2542   // #pragma directives in macros)
2543 
2544   assert(Loc.isValid() && "Source location is expected to be always valid.");
2545   assert(Loc.isFileID() && "Source location is expected to refer to a file.");
2546 
2547   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2548   assert(PLoc.isValid() && "Source location is expected to be always valid.");
2549 
2550   llvm::sys::fs::UniqueID ID;
2551   if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2552     llvm_unreachable("Source file with target region no longer exists!");
2553 
2554   DeviceID = ID.getDevice();
2555   FileID = ID.getFile();
2556   LineNum = PLoc.getLine();
2557 }
2558 
2559 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
2560                                                      llvm::GlobalVariable *Addr,
2561                                                      bool PerformInit) {
2562   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2563       isDeclareTargetDeclaration(VD);
2564   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link)
2565     return false;
2566   VD = VD->getDefinition(CGM.getContext());
2567   if (VD && !DeclareTargetWithDefinition.insert(VD).second)
2568     return CGM.getLangOpts().OpenMPIsDevice;
2569 
2570   QualType ASTTy = VD->getType();
2571 
2572   SourceLocation Loc = VD->getCanonicalDecl()->getLocStart();
2573   // Produce the unique prefix to identify the new target regions. We use
2574   // the source location of the variable declaration which we know to not
2575   // conflict with any target region.
2576   unsigned DeviceID;
2577   unsigned FileID;
2578   unsigned Line;
2579   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2580   SmallString<128> Buffer, Out;
2581   {
2582     llvm::raw_svector_ostream OS(Buffer);
2583     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2584        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2585   }
2586 
2587   const Expr *Init = VD->getAnyInitializer();
2588   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2589     llvm::Constant *Ctor;
2590     llvm::Constant *ID;
2591     if (CGM.getLangOpts().OpenMPIsDevice) {
2592       // Generate function that re-emits the declaration's initializer into
2593       // the threadprivate copy of the variable VD
2594       CodeGenFunction CtorCGF(CGM);
2595 
2596       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2597       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2598       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2599           FTy, Twine(Buffer, "_ctor"), FI, Loc);
2600       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2601       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2602                             FunctionArgList(), Loc, Loc);
2603       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2604       CtorCGF.EmitAnyExprToMem(Init,
2605                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
2606                                Init->getType().getQualifiers(),
2607                                /*IsInitializer=*/true);
2608       CtorCGF.FinishFunction();
2609       Ctor = Fn;
2610       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2611     } else {
2612       Ctor = new llvm::GlobalVariable(
2613           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2614           llvm::GlobalValue::PrivateLinkage,
2615           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2616       ID = Ctor;
2617     }
2618 
2619     // Register the information for the entry associated with the constructor.
2620     Out.clear();
2621     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2622         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2623         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
2624   }
2625   if (VD->getType().isDestructedType() != QualType::DK_none) {
2626     llvm::Constant *Dtor;
2627     llvm::Constant *ID;
2628     if (CGM.getLangOpts().OpenMPIsDevice) {
2629       // Generate function that emits destructor call for the threadprivate
2630       // copy of the variable VD
2631       CodeGenFunction DtorCGF(CGM);
2632 
2633       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2634       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2635       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2636           FTy, Twine(Buffer, "_dtor"), FI, Loc);
2637       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2638       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2639                             FunctionArgList(), Loc, Loc);
2640       // Create a scope with an artificial location for the body of this
2641       // function.
2642       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2643       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2644                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2645                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2646       DtorCGF.FinishFunction();
2647       Dtor = Fn;
2648       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2649     } else {
2650       Dtor = new llvm::GlobalVariable(
2651           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2652           llvm::GlobalValue::PrivateLinkage,
2653           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2654       ID = Dtor;
2655     }
2656     // Register the information for the entry associated with the destructor.
2657     Out.clear();
2658     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2659         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2660         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2661   }
2662   return CGM.getLangOpts().OpenMPIsDevice;
2663 }
2664 
2665 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2666                                                           QualType VarType,
2667                                                           StringRef Name) {
2668   llvm::Twine VarName(Name, ".artificial.");
2669   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2670   llvm::Value *GAddr = getOrCreateInternalVariable(VarLVType, VarName);
2671   llvm::Value *Args[] = {
2672       emitUpdateLocation(CGF, SourceLocation()),
2673       getThreadID(CGF, SourceLocation()),
2674       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2675       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2676                                 /*IsSigned=*/false),
2677       getOrCreateInternalVariable(CGM.VoidPtrPtrTy, VarName + ".cache.")};
2678   return Address(
2679       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2680           CGF.EmitRuntimeCall(
2681               createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2682           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2683       CGM.getPointerAlign());
2684 }
2685 
2686 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
2687 /// function. Here is the logic:
2688 /// if (Cond) {
2689 ///   ThenGen();
2690 /// } else {
2691 ///   ElseGen();
2692 /// }
2693 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
2694                                       const RegionCodeGenTy &ThenGen,
2695                                       const RegionCodeGenTy &ElseGen) {
2696   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2697 
2698   // If the condition constant folds and can be elided, try to avoid emitting
2699   // the condition and the dead arm of the if/else.
2700   bool CondConstant;
2701   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2702     if (CondConstant)
2703       ThenGen(CGF);
2704     else
2705       ElseGen(CGF);
2706     return;
2707   }
2708 
2709   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2710   // emit the conditional branch.
2711   auto ThenBlock = CGF.createBasicBlock("omp_if.then");
2712   auto ElseBlock = CGF.createBasicBlock("omp_if.else");
2713   auto ContBlock = CGF.createBasicBlock("omp_if.end");
2714   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2715 
2716   // Emit the 'then' code.
2717   CGF.EmitBlock(ThenBlock);
2718   ThenGen(CGF);
2719   CGF.EmitBranch(ContBlock);
2720   // Emit the 'else' code if present.
2721   // There is no need to emit line number for unconditional branch.
2722   (void)ApplyDebugLocation::CreateEmpty(CGF);
2723   CGF.EmitBlock(ElseBlock);
2724   ElseGen(CGF);
2725   // There is no need to emit line number for unconditional branch.
2726   (void)ApplyDebugLocation::CreateEmpty(CGF);
2727   CGF.EmitBranch(ContBlock);
2728   // Emit the continuation block for code after the if.
2729   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2730 }
2731 
2732 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2733                                        llvm::Value *OutlinedFn,
2734                                        ArrayRef<llvm::Value *> CapturedVars,
2735                                        const Expr *IfCond) {
2736   if (!CGF.HaveInsertPoint())
2737     return;
2738   auto *RTLoc = emitUpdateLocation(CGF, Loc);
2739   auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
2740                                                      PrePostActionTy &) {
2741     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2742     auto &RT = CGF.CGM.getOpenMPRuntime();
2743     llvm::Value *Args[] = {
2744         RTLoc,
2745         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2746         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2747     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2748     RealArgs.append(std::begin(Args), std::end(Args));
2749     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2750 
2751     auto RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
2752     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2753   };
2754   auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
2755                                                           PrePostActionTy &) {
2756     auto &RT = CGF.CGM.getOpenMPRuntime();
2757     auto ThreadID = RT.getThreadID(CGF, Loc);
2758     // Build calls:
2759     // __kmpc_serialized_parallel(&Loc, GTid);
2760     llvm::Value *Args[] = {RTLoc, ThreadID};
2761     CGF.EmitRuntimeCall(
2762         RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
2763 
2764     // OutlinedFn(&GTid, &zero, CapturedStruct);
2765     auto ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2766     Address ZeroAddr =
2767         CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
2768                              /*Name*/ ".zero.addr");
2769     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
2770     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2771     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2772     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2773     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2774     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2775 
2776     // __kmpc_end_serialized_parallel(&Loc, GTid);
2777     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2778     CGF.EmitRuntimeCall(
2779         RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
2780         EndArgs);
2781   };
2782   if (IfCond)
2783     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
2784   else {
2785     RegionCodeGenTy ThenRCG(ThenGen);
2786     ThenRCG(CGF);
2787   }
2788 }
2789 
2790 // If we're inside an (outlined) parallel region, use the region info's
2791 // thread-ID variable (it is passed in a first argument of the outlined function
2792 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2793 // regular serial code region, get thread ID by calling kmp_int32
2794 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2795 // return the address of that temp.
2796 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2797                                              SourceLocation Loc) {
2798   if (auto *OMPRegionInfo =
2799           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2800     if (OMPRegionInfo->getThreadIDVariable())
2801       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
2802 
2803   auto ThreadID = getThreadID(CGF, Loc);
2804   auto Int32Ty =
2805       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2806   auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2807   CGF.EmitStoreOfScalar(ThreadID,
2808                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2809 
2810   return ThreadIDTemp;
2811 }
2812 
2813 llvm::Constant *
2814 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
2815                                              const llvm::Twine &Name) {
2816   SmallString<256> Buffer;
2817   llvm::raw_svector_ostream Out(Buffer);
2818   Out << Name;
2819   auto RuntimeName = Out.str();
2820   auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
2821   if (Elem.second) {
2822     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2823            "OMP internal variable has different type than requested");
2824     return &*Elem.second;
2825   }
2826 
2827   return Elem.second = new llvm::GlobalVariable(
2828              CGM.getModule(), Ty, /*IsConstant*/ false,
2829              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2830              Elem.first());
2831 }
2832 
2833 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2834   llvm::Twine Name(".gomp_critical_user_", CriticalName);
2835   return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
2836 }
2837 
2838 namespace {
2839 /// Common pre(post)-action for different OpenMP constructs.
2840 class CommonActionTy final : public PrePostActionTy {
2841   llvm::Value *EnterCallee;
2842   ArrayRef<llvm::Value *> EnterArgs;
2843   llvm::Value *ExitCallee;
2844   ArrayRef<llvm::Value *> ExitArgs;
2845   bool Conditional;
2846   llvm::BasicBlock *ContBlock = nullptr;
2847 
2848 public:
2849   CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs,
2850                  llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs,
2851                  bool Conditional = false)
2852       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2853         ExitArgs(ExitArgs), Conditional(Conditional) {}
2854   void Enter(CodeGenFunction &CGF) override {
2855     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2856     if (Conditional) {
2857       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2858       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2859       ContBlock = CGF.createBasicBlock("omp_if.end");
2860       // Generate the branch (If-stmt)
2861       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2862       CGF.EmitBlock(ThenBlock);
2863     }
2864   }
2865   void Done(CodeGenFunction &CGF) {
2866     // Emit the rest of blocks/branches
2867     CGF.EmitBranch(ContBlock);
2868     CGF.EmitBlock(ContBlock, true);
2869   }
2870   void Exit(CodeGenFunction &CGF) override {
2871     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2872   }
2873 };
2874 } // anonymous namespace
2875 
2876 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2877                                          StringRef CriticalName,
2878                                          const RegionCodeGenTy &CriticalOpGen,
2879                                          SourceLocation Loc, const Expr *Hint) {
2880   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2881   // CriticalOpGen();
2882   // __kmpc_end_critical(ident_t *, gtid, Lock);
2883   // Prepare arguments and build a call to __kmpc_critical
2884   if (!CGF.HaveInsertPoint())
2885     return;
2886   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2887                          getCriticalRegionLock(CriticalName)};
2888   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2889                                                 std::end(Args));
2890   if (Hint) {
2891     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2892         CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
2893   }
2894   CommonActionTy Action(
2895       createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
2896                                  : OMPRTL__kmpc_critical),
2897       EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
2898   CriticalOpGen.setAction(Action);
2899   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2900 }
2901 
2902 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2903                                        const RegionCodeGenTy &MasterOpGen,
2904                                        SourceLocation Loc) {
2905   if (!CGF.HaveInsertPoint())
2906     return;
2907   // if(__kmpc_master(ident_t *, gtid)) {
2908   //   MasterOpGen();
2909   //   __kmpc_end_master(ident_t *, gtid);
2910   // }
2911   // Prepare arguments and build a call to __kmpc_master
2912   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2913   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
2914                         createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
2915                         /*Conditional=*/true);
2916   MasterOpGen.setAction(Action);
2917   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2918   Action.Done(CGF);
2919 }
2920 
2921 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2922                                         SourceLocation Loc) {
2923   if (!CGF.HaveInsertPoint())
2924     return;
2925   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2926   llvm::Value *Args[] = {
2927       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2928       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2929   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
2930   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2931     Region->emitUntiedSwitch(CGF);
2932 }
2933 
2934 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2935                                           const RegionCodeGenTy &TaskgroupOpGen,
2936                                           SourceLocation Loc) {
2937   if (!CGF.HaveInsertPoint())
2938     return;
2939   // __kmpc_taskgroup(ident_t *, gtid);
2940   // TaskgroupOpGen();
2941   // __kmpc_end_taskgroup(ident_t *, gtid);
2942   // Prepare arguments and build a call to __kmpc_taskgroup
2943   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2944   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
2945                         createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
2946                         Args);
2947   TaskgroupOpGen.setAction(Action);
2948   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2949 }
2950 
2951 /// Given an array of pointers to variables, project the address of a
2952 /// given variable.
2953 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2954                                       unsigned Index, const VarDecl *Var) {
2955   // Pull out the pointer to the variable.
2956   Address PtrAddr =
2957       CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize());
2958   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2959 
2960   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2961   Addr = CGF.Builder.CreateElementBitCast(
2962       Addr, CGF.ConvertTypeForMem(Var->getType()));
2963   return Addr;
2964 }
2965 
2966 static llvm::Value *emitCopyprivateCopyFunction(
2967     CodeGenModule &CGM, llvm::Type *ArgsType,
2968     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2969     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2970     SourceLocation Loc) {
2971   auto &C = CGM.getContext();
2972   // void copy_func(void *LHSArg, void *RHSArg);
2973   FunctionArgList Args;
2974   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2975                            ImplicitParamDecl::Other);
2976   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2977                            ImplicitParamDecl::Other);
2978   Args.push_back(&LHSArg);
2979   Args.push_back(&RHSArg);
2980   auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2981   auto *Fn = llvm::Function::Create(
2982       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
2983       ".omp.copyprivate.copy_func", &CGM.getModule());
2984   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2985   CodeGenFunction CGF(CGM);
2986   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2987   // Dest = (void*[n])(LHSArg);
2988   // Src = (void*[n])(RHSArg);
2989   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2990       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2991       ArgsType), CGF.getPointerAlign());
2992   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2993       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2994       ArgsType), CGF.getPointerAlign());
2995   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2996   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2997   // ...
2998   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2999   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
3000     auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3001     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3002 
3003     auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3004     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3005 
3006     auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3007     QualType Type = VD->getType();
3008     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3009   }
3010   CGF.FinishFunction();
3011   return Fn;
3012 }
3013 
3014 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
3015                                        const RegionCodeGenTy &SingleOpGen,
3016                                        SourceLocation Loc,
3017                                        ArrayRef<const Expr *> CopyprivateVars,
3018                                        ArrayRef<const Expr *> SrcExprs,
3019                                        ArrayRef<const Expr *> DstExprs,
3020                                        ArrayRef<const Expr *> AssignmentOps) {
3021   if (!CGF.HaveInsertPoint())
3022     return;
3023   assert(CopyprivateVars.size() == SrcExprs.size() &&
3024          CopyprivateVars.size() == DstExprs.size() &&
3025          CopyprivateVars.size() == AssignmentOps.size());
3026   auto &C = CGM.getContext();
3027   // int32 did_it = 0;
3028   // if(__kmpc_single(ident_t *, gtid)) {
3029   //   SingleOpGen();
3030   //   __kmpc_end_single(ident_t *, gtid);
3031   //   did_it = 1;
3032   // }
3033   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3034   // <copy_func>, did_it);
3035 
3036   Address DidIt = Address::invalid();
3037   if (!CopyprivateVars.empty()) {
3038     // int32 did_it = 0;
3039     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3040     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3041     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3042   }
3043   // Prepare arguments and build a call to __kmpc_single
3044   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3045   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3046                         createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
3047                         /*Conditional=*/true);
3048   SingleOpGen.setAction(Action);
3049   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3050   if (DidIt.isValid()) {
3051     // did_it = 1;
3052     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3053   }
3054   Action.Done(CGF);
3055   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3056   // <copy_func>, did_it);
3057   if (DidIt.isValid()) {
3058     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3059     auto CopyprivateArrayTy =
3060         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
3061                                /*IndexTypeQuals=*/0);
3062     // Create a list of all private variables for copyprivate.
3063     Address CopyprivateList =
3064         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3065     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
3066       Address Elem = CGF.Builder.CreateConstArrayGEP(
3067           CopyprivateList, I, CGF.getPointerSize());
3068       CGF.Builder.CreateStore(
3069           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3070               CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
3071           Elem);
3072     }
3073     // Build function that copies private values from single region to all other
3074     // threads in the corresponding parallel region.
3075     auto *CpyFn = emitCopyprivateCopyFunction(
3076         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
3077         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3078     auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3079     Address CL =
3080       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3081                                                       CGF.VoidPtrTy);
3082     auto *DidItVal = CGF.Builder.CreateLoad(DidIt);
3083     llvm::Value *Args[] = {
3084         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3085         getThreadID(CGF, Loc),        // i32 <gtid>
3086         BufSize,                      // size_t <buf_size>
3087         CL.getPointer(),              // void *<copyprivate list>
3088         CpyFn,                        // void (*) (void *, void *) <copy_func>
3089         DidItVal                      // i32 did_it
3090     };
3091     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
3092   }
3093 }
3094 
3095 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
3096                                         const RegionCodeGenTy &OrderedOpGen,
3097                                         SourceLocation Loc, bool IsThreads) {
3098   if (!CGF.HaveInsertPoint())
3099     return;
3100   // __kmpc_ordered(ident_t *, gtid);
3101   // OrderedOpGen();
3102   // __kmpc_end_ordered(ident_t *, gtid);
3103   // Prepare arguments and build a call to __kmpc_ordered
3104   if (IsThreads) {
3105     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3106     CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3107                           createRuntimeFunction(OMPRTL__kmpc_end_ordered),
3108                           Args);
3109     OrderedOpGen.setAction(Action);
3110     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3111     return;
3112   }
3113   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3114 }
3115 
3116 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
3117                                       OpenMPDirectiveKind Kind, bool EmitChecks,
3118                                       bool ForceSimpleCall) {
3119   if (!CGF.HaveInsertPoint())
3120     return;
3121   // Build call __kmpc_cancel_barrier(loc, thread_id);
3122   // Build call __kmpc_barrier(loc, thread_id);
3123   unsigned Flags;
3124   if (Kind == OMPD_for)
3125     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3126   else if (Kind == OMPD_sections)
3127     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3128   else if (Kind == OMPD_single)
3129     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3130   else if (Kind == OMPD_barrier)
3131     Flags = OMP_IDENT_BARRIER_EXPL;
3132   else
3133     Flags = OMP_IDENT_BARRIER_IMPL;
3134   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3135   // thread_id);
3136   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3137                          getThreadID(CGF, Loc)};
3138   if (auto *OMPRegionInfo =
3139           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
3140     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
3141       auto *Result = CGF.EmitRuntimeCall(
3142           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
3143       if (EmitChecks) {
3144         // if (__kmpc_cancel_barrier()) {
3145         //   exit from construct;
3146         // }
3147         auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
3148         auto *ContBB = CGF.createBasicBlock(".cancel.continue");
3149         auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
3150         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3151         CGF.EmitBlock(ExitBB);
3152         //   exit from construct;
3153         auto CancelDestination =
3154             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3155         CGF.EmitBranchThroughCleanup(CancelDestination);
3156         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3157       }
3158       return;
3159     }
3160   }
3161   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
3162 }
3163 
3164 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
3165 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
3166                                           bool Chunked, bool Ordered) {
3167   switch (ScheduleKind) {
3168   case OMPC_SCHEDULE_static:
3169     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
3170                    : (Ordered ? OMP_ord_static : OMP_sch_static);
3171   case OMPC_SCHEDULE_dynamic:
3172     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
3173   case OMPC_SCHEDULE_guided:
3174     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
3175   case OMPC_SCHEDULE_runtime:
3176     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
3177   case OMPC_SCHEDULE_auto:
3178     return Ordered ? OMP_ord_auto : OMP_sch_auto;
3179   case OMPC_SCHEDULE_unknown:
3180     assert(!Chunked && "chunk was specified but schedule kind not known");
3181     return Ordered ? OMP_ord_static : OMP_sch_static;
3182   }
3183   llvm_unreachable("Unexpected runtime schedule");
3184 }
3185 
3186 /// \brief Map the OpenMP distribute schedule to the runtime enumeration.
3187 static OpenMPSchedType
3188 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
3189   // only static is allowed for dist_schedule
3190   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
3191 }
3192 
3193 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
3194                                          bool Chunked) const {
3195   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3196   return Schedule == OMP_sch_static;
3197 }
3198 
3199 bool CGOpenMPRuntime::isStaticNonchunked(
3200     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3201   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3202   return Schedule == OMP_dist_sch_static;
3203 }
3204 
3205 
3206 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
3207   auto Schedule =
3208       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3209   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3210   return Schedule != OMP_sch_static;
3211 }
3212 
3213 static int addMonoNonMonoModifier(OpenMPSchedType Schedule,
3214                                   OpenMPScheduleClauseModifier M1,
3215                                   OpenMPScheduleClauseModifier M2) {
3216   int Modifier = 0;
3217   switch (M1) {
3218   case OMPC_SCHEDULE_MODIFIER_monotonic:
3219     Modifier = OMP_sch_modifier_monotonic;
3220     break;
3221   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3222     Modifier = OMP_sch_modifier_nonmonotonic;
3223     break;
3224   case OMPC_SCHEDULE_MODIFIER_simd:
3225     if (Schedule == OMP_sch_static_chunked)
3226       Schedule = OMP_sch_static_balanced_chunked;
3227     break;
3228   case OMPC_SCHEDULE_MODIFIER_last:
3229   case OMPC_SCHEDULE_MODIFIER_unknown:
3230     break;
3231   }
3232   switch (M2) {
3233   case OMPC_SCHEDULE_MODIFIER_monotonic:
3234     Modifier = OMP_sch_modifier_monotonic;
3235     break;
3236   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3237     Modifier = OMP_sch_modifier_nonmonotonic;
3238     break;
3239   case OMPC_SCHEDULE_MODIFIER_simd:
3240     if (Schedule == OMP_sch_static_chunked)
3241       Schedule = OMP_sch_static_balanced_chunked;
3242     break;
3243   case OMPC_SCHEDULE_MODIFIER_last:
3244   case OMPC_SCHEDULE_MODIFIER_unknown:
3245     break;
3246   }
3247   return Schedule | Modifier;
3248 }
3249 
3250 void CGOpenMPRuntime::emitForDispatchInit(
3251     CodeGenFunction &CGF, SourceLocation Loc,
3252     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3253     bool Ordered, const DispatchRTInput &DispatchValues) {
3254   if (!CGF.HaveInsertPoint())
3255     return;
3256   OpenMPSchedType Schedule = getRuntimeSchedule(
3257       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3258   assert(Ordered ||
3259          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3260           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3261           Schedule != OMP_sch_static_balanced_chunked));
3262   // Call __kmpc_dispatch_init(
3263   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3264   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
3265   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
3266 
3267   // If the Chunk was not specified in the clause - use default value 1.
3268   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3269                                             : CGF.Builder.getIntN(IVSize, 1);
3270   llvm::Value *Args[] = {
3271       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3272       CGF.Builder.getInt32(addMonoNonMonoModifier(
3273           Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3274       DispatchValues.LB,                                // Lower
3275       DispatchValues.UB,                                // Upper
3276       CGF.Builder.getIntN(IVSize, 1),                   // Stride
3277       Chunk                                             // Chunk
3278   };
3279   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3280 }
3281 
3282 static void emitForStaticInitCall(
3283     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3284     llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule,
3285     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
3286     const CGOpenMPRuntime::StaticRTInput &Values) {
3287   if (!CGF.HaveInsertPoint())
3288     return;
3289 
3290   assert(!Values.Ordered);
3291   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3292          Schedule == OMP_sch_static_balanced_chunked ||
3293          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3294          Schedule == OMP_dist_sch_static ||
3295          Schedule == OMP_dist_sch_static_chunked);
3296 
3297   // Call __kmpc_for_static_init(
3298   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3299   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3300   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3301   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
3302   llvm::Value *Chunk = Values.Chunk;
3303   if (Chunk == nullptr) {
3304     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3305             Schedule == OMP_dist_sch_static) &&
3306            "expected static non-chunked schedule");
3307     // If the Chunk was not specified in the clause - use default value 1.
3308     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3309   } else {
3310     assert((Schedule == OMP_sch_static_chunked ||
3311             Schedule == OMP_sch_static_balanced_chunked ||
3312             Schedule == OMP_ord_static_chunked ||
3313             Schedule == OMP_dist_sch_static_chunked) &&
3314            "expected static chunked schedule");
3315   }
3316   llvm::Value *Args[] = {
3317       UpdateLocation,
3318       ThreadId,
3319       CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1,
3320                                                   M2)), // Schedule type
3321       Values.IL.getPointer(),                           // &isLastIter
3322       Values.LB.getPointer(),                           // &LB
3323       Values.UB.getPointer(),                           // &UB
3324       Values.ST.getPointer(),                           // &Stride
3325       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
3326       Chunk                                             // Chunk
3327   };
3328   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3329 }
3330 
3331 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
3332                                         SourceLocation Loc,
3333                                         OpenMPDirectiveKind DKind,
3334                                         const OpenMPScheduleTy &ScheduleKind,
3335                                         const StaticRTInput &Values) {
3336   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3337       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3338   assert(isOpenMPWorksharingDirective(DKind) &&
3339          "Expected loop-based or sections-based directive.");
3340   auto *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3341                                              isOpenMPLoopDirective(DKind)
3342                                                  ? OMP_IDENT_WORK_LOOP
3343                                                  : OMP_IDENT_WORK_SECTIONS);
3344   auto *ThreadId = getThreadID(CGF, Loc);
3345   auto *StaticInitFunction =
3346       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3347   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3348                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3349 }
3350 
3351 void CGOpenMPRuntime::emitDistributeStaticInit(
3352     CodeGenFunction &CGF, SourceLocation Loc,
3353     OpenMPDistScheduleClauseKind SchedKind,
3354     const CGOpenMPRuntime::StaticRTInput &Values) {
3355   OpenMPSchedType ScheduleNum =
3356       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3357   auto *UpdatedLocation =
3358       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3359   auto *ThreadId = getThreadID(CGF, Loc);
3360   auto *StaticInitFunction =
3361       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3362   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3363                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3364                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
3365 }
3366 
3367 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
3368                                           SourceLocation Loc,
3369                                           OpenMPDirectiveKind DKind) {
3370   if (!CGF.HaveInsertPoint())
3371     return;
3372   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3373   llvm::Value *Args[] = {
3374       emitUpdateLocation(CGF, Loc,
3375                          isOpenMPDistributeDirective(DKind)
3376                              ? OMP_IDENT_WORK_DISTRIBUTE
3377                              : isOpenMPLoopDirective(DKind)
3378                                    ? OMP_IDENT_WORK_LOOP
3379                                    : OMP_IDENT_WORK_SECTIONS),
3380       getThreadID(CGF, Loc)};
3381   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
3382                       Args);
3383 }
3384 
3385 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
3386                                                  SourceLocation Loc,
3387                                                  unsigned IVSize,
3388                                                  bool IVSigned) {
3389   if (!CGF.HaveInsertPoint())
3390     return;
3391   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3392   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3393   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3394 }
3395 
3396 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
3397                                           SourceLocation Loc, unsigned IVSize,
3398                                           bool IVSigned, Address IL,
3399                                           Address LB, Address UB,
3400                                           Address ST) {
3401   // Call __kmpc_dispatch_next(
3402   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3403   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3404   //          kmp_int[32|64] *p_stride);
3405   llvm::Value *Args[] = {
3406       emitUpdateLocation(CGF, Loc),
3407       getThreadID(CGF, Loc),
3408       IL.getPointer(), // &isLastIter
3409       LB.getPointer(), // &Lower
3410       UB.getPointer(), // &Upper
3411       ST.getPointer()  // &Stride
3412   };
3413   llvm::Value *Call =
3414       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3415   return CGF.EmitScalarConversion(
3416       Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true),
3417       CGF.getContext().BoolTy, Loc);
3418 }
3419 
3420 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
3421                                            llvm::Value *NumThreads,
3422                                            SourceLocation Loc) {
3423   if (!CGF.HaveInsertPoint())
3424     return;
3425   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3426   llvm::Value *Args[] = {
3427       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3428       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3429   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
3430                       Args);
3431 }
3432 
3433 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
3434                                          OpenMPProcBindClauseKind ProcBind,
3435                                          SourceLocation Loc) {
3436   if (!CGF.HaveInsertPoint())
3437     return;
3438   // Constants for proc bind value accepted by the runtime.
3439   enum ProcBindTy {
3440     ProcBindFalse = 0,
3441     ProcBindTrue,
3442     ProcBindMaster,
3443     ProcBindClose,
3444     ProcBindSpread,
3445     ProcBindIntel,
3446     ProcBindDefault
3447   } RuntimeProcBind;
3448   switch (ProcBind) {
3449   case OMPC_PROC_BIND_master:
3450     RuntimeProcBind = ProcBindMaster;
3451     break;
3452   case OMPC_PROC_BIND_close:
3453     RuntimeProcBind = ProcBindClose;
3454     break;
3455   case OMPC_PROC_BIND_spread:
3456     RuntimeProcBind = ProcBindSpread;
3457     break;
3458   case OMPC_PROC_BIND_unknown:
3459     llvm_unreachable("Unsupported proc_bind value.");
3460   }
3461   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3462   llvm::Value *Args[] = {
3463       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3464       llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
3465   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
3466 }
3467 
3468 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3469                                 SourceLocation Loc) {
3470   if (!CGF.HaveInsertPoint())
3471     return;
3472   // Build call void __kmpc_flush(ident_t *loc)
3473   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
3474                       emitUpdateLocation(CGF, Loc));
3475 }
3476 
3477 namespace {
3478 /// \brief Indexes of fields for type kmp_task_t.
3479 enum KmpTaskTFields {
3480   /// \brief List of shared variables.
3481   KmpTaskTShareds,
3482   /// \brief Task routine.
3483   KmpTaskTRoutine,
3484   /// \brief Partition id for the untied tasks.
3485   KmpTaskTPartId,
3486   /// Function with call of destructors for private variables.
3487   Data1,
3488   /// Task priority.
3489   Data2,
3490   /// (Taskloops only) Lower bound.
3491   KmpTaskTLowerBound,
3492   /// (Taskloops only) Upper bound.
3493   KmpTaskTUpperBound,
3494   /// (Taskloops only) Stride.
3495   KmpTaskTStride,
3496   /// (Taskloops only) Is last iteration flag.
3497   KmpTaskTLastIter,
3498   /// (Taskloops only) Reduction data.
3499   KmpTaskTReductions,
3500 };
3501 } // anonymous namespace
3502 
3503 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3504   return OffloadEntriesTargetRegion.empty() &&
3505          OffloadEntriesDeviceGlobalVar.empty();
3506 }
3507 
3508 /// \brief Initialize target region entry.
3509 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3510     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3511                                     StringRef ParentName, unsigned LineNum,
3512                                     unsigned Order) {
3513   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3514                                              "only required for the device "
3515                                              "code generation.");
3516   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3517       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3518                                    OMPTargetRegionEntryTargetRegion);
3519   ++OffloadingEntriesNum;
3520 }
3521 
3522 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3523     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3524                                   StringRef ParentName, unsigned LineNum,
3525                                   llvm::Constant *Addr, llvm::Constant *ID,
3526                                   OMPTargetRegionEntryKind Flags) {
3527   // If we are emitting code for a target, the entry is already initialized,
3528   // only has to be registered.
3529   if (CGM.getLangOpts().OpenMPIsDevice) {
3530     assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
3531            "Entry must exist.");
3532     auto &Entry =
3533         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3534     assert(Entry.isValid() && "Entry not initialized!");
3535     Entry.setAddress(Addr);
3536     Entry.setID(ID);
3537     Entry.setFlags(Flags);
3538   } else {
3539     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3540     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3541     ++OffloadingEntriesNum;
3542   }
3543 }
3544 
3545 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3546     unsigned DeviceID, unsigned FileID, StringRef ParentName,
3547     unsigned LineNum) const {
3548   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3549   if (PerDevice == OffloadEntriesTargetRegion.end())
3550     return false;
3551   auto PerFile = PerDevice->second.find(FileID);
3552   if (PerFile == PerDevice->second.end())
3553     return false;
3554   auto PerParentName = PerFile->second.find(ParentName);
3555   if (PerParentName == PerFile->second.end())
3556     return false;
3557   auto PerLine = PerParentName->second.find(LineNum);
3558   if (PerLine == PerParentName->second.end())
3559     return false;
3560   // Fail if this entry is already registered.
3561   if (PerLine->second.getAddress() || PerLine->second.getID())
3562     return false;
3563   return true;
3564 }
3565 
3566 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3567     const OffloadTargetRegionEntryInfoActTy &Action) {
3568   // Scan all target region entries and perform the provided action.
3569   for (const auto &D : OffloadEntriesTargetRegion)
3570     for (const auto &F : D.second)
3571       for (const auto &P : F.second)
3572         for (const auto &L : P.second)
3573           Action(D.first, F.first, P.first(), L.first, L.second);
3574 }
3575 
3576 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3577     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3578                                        OMPTargetGlobalVarEntryKind Flags,
3579                                        unsigned Order) {
3580   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3581                                              "only required for the device "
3582                                              "code generation.");
3583   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3584   ++OffloadingEntriesNum;
3585 }
3586 
3587 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3588     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3589                                      CharUnits VarSize,
3590                                      OMPTargetGlobalVarEntryKind Flags,
3591                                      llvm::GlobalValue::LinkageTypes Linkage) {
3592   if (CGM.getLangOpts().OpenMPIsDevice) {
3593     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3594     assert(Entry.isValid() && Entry.getFlags() == Flags &&
3595            "Entry not initialized!");
3596     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3597            "Resetting with the new address.");
3598     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName))
3599       return;
3600     Entry.setAddress(Addr);
3601     Entry.setVarSize(VarSize);
3602     Entry.setLinkage(Linkage);
3603   } else {
3604     if (hasDeviceGlobalVarEntryInfo(VarName))
3605       return;
3606     OffloadEntriesDeviceGlobalVar.try_emplace(
3607         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3608     ++OffloadingEntriesNum;
3609   }
3610 }
3611 
3612 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3613     actOnDeviceGlobalVarEntriesInfo(
3614         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3615   // Scan all target region entries and perform the provided action.
3616   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3617     Action(E.getKey(), E.getValue());
3618 }
3619 
3620 llvm::Function *
3621 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
3622   // If we don't have entries or if we are emitting code for the device, we
3623   // don't need to do anything.
3624   if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
3625     return nullptr;
3626 
3627   auto &M = CGM.getModule();
3628   auto &C = CGM.getContext();
3629 
3630   // Get list of devices we care about
3631   auto &Devices = CGM.getLangOpts().OMPTargetTriples;
3632 
3633   // We should be creating an offloading descriptor only if there are devices
3634   // specified.
3635   assert(!Devices.empty() && "No OpenMP offloading devices??");
3636 
3637   // Create the external variables that will point to the begin and end of the
3638   // host entries section. These will be defined by the linker.
3639   auto *OffloadEntryTy =
3640       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
3641   llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable(
3642       M, OffloadEntryTy, /*isConstant=*/true,
3643       llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
3644       ".omp_offloading.entries_begin");
3645   llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable(
3646       M, OffloadEntryTy, /*isConstant=*/true,
3647       llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
3648       ".omp_offloading.entries_end");
3649 
3650   // Create all device images
3651   auto *DeviceImageTy = cast<llvm::StructType>(
3652       CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy()));
3653   ConstantInitBuilder DeviceImagesBuilder(CGM);
3654   auto DeviceImagesEntries = DeviceImagesBuilder.beginArray(DeviceImageTy);
3655 
3656   for (llvm::Triple Device : Devices) {
3657     StringRef T = Device.getTriple();
3658     auto *ImgBegin = new llvm::GlobalVariable(
3659         M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
3660         /*Initializer=*/nullptr,
3661         Twine(".omp_offloading.img_start.") + Twine(T));
3662     auto *ImgEnd = new llvm::GlobalVariable(
3663         M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
3664         /*Initializer=*/nullptr, Twine(".omp_offloading.img_end.") + Twine(T));
3665 
3666     auto Dev = DeviceImagesEntries.beginStruct(DeviceImageTy);
3667     Dev.add(ImgBegin);
3668     Dev.add(ImgEnd);
3669     Dev.add(HostEntriesBegin);
3670     Dev.add(HostEntriesEnd);
3671     Dev.finishAndAddTo(DeviceImagesEntries);
3672   }
3673 
3674   // Create device images global array.
3675   llvm::GlobalVariable *DeviceImages =
3676     DeviceImagesEntries.finishAndCreateGlobal(".omp_offloading.device_images",
3677                                               CGM.getPointerAlign(),
3678                                               /*isConstant=*/true);
3679   DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3680 
3681   // This is a Zero array to be used in the creation of the constant expressions
3682   llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
3683                              llvm::Constant::getNullValue(CGM.Int32Ty)};
3684 
3685   // Create the target region descriptor.
3686   auto *BinaryDescriptorTy = cast<llvm::StructType>(
3687       CGM.getTypes().ConvertTypeForMem(getTgtBinaryDescriptorQTy()));
3688   ConstantInitBuilder DescBuilder(CGM);
3689   auto DescInit = DescBuilder.beginStruct(BinaryDescriptorTy);
3690   DescInit.addInt(CGM.Int32Ty, Devices.size());
3691   DescInit.add(llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(),
3692                                                     DeviceImages,
3693                                                     Index));
3694   DescInit.add(HostEntriesBegin);
3695   DescInit.add(HostEntriesEnd);
3696 
3697   auto *Desc = DescInit.finishAndCreateGlobal(".omp_offloading.descriptor",
3698                                               CGM.getPointerAlign(),
3699                                               /*isConstant=*/true);
3700 
3701   // Emit code to register or unregister the descriptor at execution
3702   // startup or closing, respectively.
3703 
3704   llvm::Function *UnRegFn;
3705   {
3706     FunctionArgList Args;
3707     ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other);
3708     Args.push_back(&DummyPtr);
3709 
3710     CodeGenFunction CGF(CGM);
3711     // Disable debug info for global (de-)initializer because they are not part
3712     // of some particular construct.
3713     CGF.disableDebugInfo();
3714     auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3715     auto FTy = CGM.getTypes().GetFunctionType(FI);
3716     UnRegFn = CGM.CreateGlobalInitOrDestructFunction(
3717         FTy, ".omp_offloading.descriptor_unreg", FI);
3718     CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args);
3719     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
3720                         Desc);
3721     CGF.FinishFunction();
3722   }
3723   llvm::Function *RegFn;
3724   {
3725     CodeGenFunction CGF(CGM);
3726     // Disable debug info for global (de-)initializer because they are not part
3727     // of some particular construct.
3728     CGF.disableDebugInfo();
3729     auto &FI = CGM.getTypes().arrangeNullaryFunction();
3730     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
3731     RegFn = CGM.CreateGlobalInitOrDestructFunction(
3732         FTy, ".omp_offloading.descriptor_reg", FI);
3733     CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList());
3734     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc);
3735     // Create a variable to drive the registration and unregistration of the
3736     // descriptor, so we can reuse the logic that emits Ctors and Dtors.
3737     ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(),
3738                                   SourceLocation(), nullptr, C.CharTy,
3739                                   ImplicitParamDecl::Other);
3740     CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
3741     CGF.FinishFunction();
3742   }
3743   if (CGM.supportsCOMDAT()) {
3744     // It is sufficient to call registration function only once, so create a
3745     // COMDAT group for registration/unregistration functions and associated
3746     // data. That would reduce startup time and code size. Registration
3747     // function serves as a COMDAT group key.
3748     auto ComdatKey = M.getOrInsertComdat(RegFn->getName());
3749     RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
3750     RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility);
3751     RegFn->setComdat(ComdatKey);
3752     UnRegFn->setComdat(ComdatKey);
3753     DeviceImages->setComdat(ComdatKey);
3754     Desc->setComdat(ComdatKey);
3755   }
3756   return RegFn;
3757 }
3758 
3759 void CGOpenMPRuntime::createOffloadEntry(
3760     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3761     llvm::GlobalValue::LinkageTypes Linkage) {
3762   StringRef Name = Addr->getName();
3763   auto *TgtOffloadEntryType = cast<llvm::StructType>(
3764       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()));
3765   llvm::LLVMContext &C = CGM.getModule().getContext();
3766   llvm::Module &M = CGM.getModule();
3767 
3768   // Make sure the address has the right type.
3769   llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy);
3770 
3771   // Create constant string with the name.
3772   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3773 
3774   llvm::GlobalVariable *Str =
3775       new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true,
3776                                llvm::GlobalValue::InternalLinkage, StrPtrInit,
3777                                ".omp_offloading.entry_name");
3778   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3779   llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy);
3780 
3781   // We can't have any padding between symbols, so we need to have 1-byte
3782   // alignment.
3783   auto Align = CharUnits::fromQuantity(1);
3784 
3785   // Create the entry struct.
3786   ConstantInitBuilder EntryBuilder(CGM);
3787   auto EntryInit = EntryBuilder.beginStruct(TgtOffloadEntryType);
3788   EntryInit.add(AddrPtr);
3789   EntryInit.add(StrPtr);
3790   EntryInit.addInt(CGM.SizeTy, Size);
3791   EntryInit.addInt(CGM.Int32Ty, Flags);
3792   EntryInit.addInt(CGM.Int32Ty, 0);
3793   llvm::GlobalVariable *Entry = EntryInit.finishAndCreateGlobal(
3794       Twine(".omp_offloading.entry.", Name), Align,
3795       /*Constant=*/true, Linkage);
3796 
3797   // The entry has to be created in the section the linker expects it to be.
3798   Entry->setSection(".omp_offloading.entries");
3799 }
3800 
3801 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3802   // Emit the offloading entries and metadata so that the device codegen side
3803   // can easily figure out what to emit. The produced metadata looks like
3804   // this:
3805   //
3806   // !omp_offload.info = !{!1, ...}
3807   //
3808   // Right now we only generate metadata for function that contain target
3809   // regions.
3810 
3811   // If we do not have entries, we dont need to do anything.
3812   if (OffloadEntriesInfoManager.empty())
3813     return;
3814 
3815   llvm::Module &M = CGM.getModule();
3816   llvm::LLVMContext &C = M.getContext();
3817   SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
3818       OrderedEntries(OffloadEntriesInfoManager.size());
3819 
3820   // Auxiliary methods to create metadata values and strings.
3821   auto &&GetMDInt = [&C](unsigned V) {
3822     return llvm::ConstantAsMetadata::get(
3823         llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), V));
3824   };
3825 
3826   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3827 
3828   // Create the offloading info metadata node.
3829   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3830 
3831   // Create function that emits metadata for each target region entry;
3832   auto &&TargetRegionMetadataEmitter =
3833       [&C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
3834           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3835           unsigned Line,
3836           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3837         // Generate metadata for target regions. Each entry of this metadata
3838         // contains:
3839         // - Entry 0 -> Kind of this type of metadata (0).
3840         // - Entry 1 -> Device ID of the file where the entry was identified.
3841         // - Entry 2 -> File ID of the file where the entry was identified.
3842         // - Entry 3 -> Mangled name of the function where the entry was
3843         // identified.
3844         // - Entry 4 -> Line in the file where the entry was identified.
3845         // - Entry 5 -> Order the entry was created.
3846         // The first element of the metadata node is the kind.
3847         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3848                                  GetMDInt(FileID),      GetMDString(ParentName),
3849                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3850 
3851         // Save this entry in the right position of the ordered entries array.
3852         OrderedEntries[E.getOrder()] = &E;
3853 
3854         // Add metadata to the named metadata node.
3855         MD->addOperand(llvm::MDNode::get(C, Ops));
3856       };
3857 
3858   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3859       TargetRegionMetadataEmitter);
3860 
3861   // Create function that emits metadata for each device global variable entry;
3862   auto &&DeviceGlobalVarMetadataEmitter =
3863       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3864        MD](StringRef MangledName,
3865            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3866                &E) {
3867         // Generate metadata for global variables. Each entry of this metadata
3868         // contains:
3869         // - Entry 0 -> Kind of this type of metadata (1).
3870         // - Entry 1 -> Mangled name of the variable.
3871         // - Entry 2 -> Declare target kind.
3872         // - Entry 3 -> Order the entry was created.
3873         // The first element of the metadata node is the kind.
3874         llvm::Metadata *Ops[] = {
3875             GetMDInt(E.getKind()), GetMDString(MangledName),
3876             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3877 
3878         // Save this entry in the right position of the ordered entries array.
3879         OrderedEntries[E.getOrder()] = &E;
3880 
3881         // Add metadata to the named metadata node.
3882         MD->addOperand(llvm::MDNode::get(C, Ops));
3883       };
3884 
3885   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3886       DeviceGlobalVarMetadataEmitter);
3887 
3888   for (const auto *E : OrderedEntries) {
3889     assert(E && "All ordered entries must exist!");
3890     if (const auto *CE =
3891             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3892                 E)) {
3893       assert(CE->getID() && CE->getAddress() &&
3894              "Entry ID and Addr are invalid!");
3895       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3896                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3897     } else if (const auto *CE =
3898                    dyn_cast<OffloadEntriesInfoManagerTy::
3899                                 OffloadEntryInfoDeviceGlobalVar>(E)) {
3900       assert(CE->getAddress() && "Entry Addr is invalid!");
3901       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3902                          CE->getVarSize().getQuantity(), CE->getFlags(),
3903                          CE->getLinkage());
3904     } else {
3905       llvm_unreachable("Unsupported entry kind.");
3906     }
3907   }
3908 }
3909 
3910 /// \brief Loads all the offload entries information from the host IR
3911 /// metadata.
3912 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3913   // If we are in target mode, load the metadata from the host IR. This code has
3914   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3915 
3916   if (!CGM.getLangOpts().OpenMPIsDevice)
3917     return;
3918 
3919   if (CGM.getLangOpts().OMPHostIRFile.empty())
3920     return;
3921 
3922   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3923   if (Buf.getError())
3924     return;
3925 
3926   llvm::LLVMContext C;
3927   auto ME = expectedToErrorOrAndEmitErrors(
3928       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3929 
3930   if (ME.getError())
3931     return;
3932 
3933   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3934   if (!MD)
3935     return;
3936 
3937   for (llvm::MDNode *MN : MD->operands()) {
3938     auto GetMDInt = [MN](unsigned Idx) {
3939       llvm::ConstantAsMetadata *V =
3940           cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3941       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3942     };
3943 
3944     auto GetMDString = [MN](unsigned Idx) {
3945       llvm::MDString *V = cast<llvm::MDString>(MN->getOperand(Idx));
3946       return V->getString();
3947     };
3948 
3949     switch (GetMDInt(0)) {
3950     default:
3951       llvm_unreachable("Unexpected metadata!");
3952       break;
3953     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3954         OffloadingEntryInfoTargetRegion:
3955       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3956           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3957           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3958           /*Order=*/GetMDInt(5));
3959       break;
3960     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3961         OffloadingEntryInfoDeviceGlobalVar:
3962       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3963           /*MangledName=*/GetMDString(1),
3964           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3965               /*Flags=*/GetMDInt(2)),
3966           /*Order=*/GetMDInt(3));
3967       break;
3968     }
3969   }
3970 }
3971 
3972 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3973   if (!KmpRoutineEntryPtrTy) {
3974     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3975     auto &C = CGM.getContext();
3976     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3977     FunctionProtoType::ExtProtoInfo EPI;
3978     KmpRoutineEntryPtrQTy = C.getPointerType(
3979         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3980     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3981   }
3982 }
3983 
3984 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
3985                                        QualType FieldTy) {
3986   auto *Field = FieldDecl::Create(
3987       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
3988       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
3989       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
3990   Field->setAccess(AS_public);
3991   DC->addDecl(Field);
3992   return Field;
3993 }
3994 
3995 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3996 
3997   // Make sure the type of the entry is already created. This is the type we
3998   // have to create:
3999   // struct __tgt_offload_entry{
4000   //   void      *addr;       // Pointer to the offload entry info.
4001   //                          // (function or global)
4002   //   char      *name;       // Name of the function or global.
4003   //   size_t     size;       // Size of the entry info (0 if it a function).
4004   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
4005   //   int32_t    reserved;   // Reserved, to use by the runtime library.
4006   // };
4007   if (TgtOffloadEntryQTy.isNull()) {
4008     ASTContext &C = CGM.getContext();
4009     auto *RD = C.buildImplicitRecord("__tgt_offload_entry");
4010     RD->startDefinition();
4011     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4012     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
4013     addFieldToRecordDecl(C, RD, C.getSizeType());
4014     addFieldToRecordDecl(
4015         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4016     addFieldToRecordDecl(
4017         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4018     RD->completeDefinition();
4019     RD->addAttr(PackedAttr::CreateImplicit(C));
4020     TgtOffloadEntryQTy = C.getRecordType(RD);
4021   }
4022   return TgtOffloadEntryQTy;
4023 }
4024 
4025 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() {
4026   // These are the types we need to build:
4027   // struct __tgt_device_image{
4028   // void   *ImageStart;       // Pointer to the target code start.
4029   // void   *ImageEnd;         // Pointer to the target code end.
4030   // // We also add the host entries to the device image, as it may be useful
4031   // // for the target runtime to have access to that information.
4032   // __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all
4033   //                                       // the entries.
4034   // __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
4035   //                                       // entries (non inclusive).
4036   // };
4037   if (TgtDeviceImageQTy.isNull()) {
4038     ASTContext &C = CGM.getContext();
4039     auto *RD = C.buildImplicitRecord("__tgt_device_image");
4040     RD->startDefinition();
4041     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4042     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4043     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4044     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4045     RD->completeDefinition();
4046     TgtDeviceImageQTy = C.getRecordType(RD);
4047   }
4048   return TgtDeviceImageQTy;
4049 }
4050 
4051 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() {
4052   // struct __tgt_bin_desc{
4053   //   int32_t              NumDevices;      // Number of devices supported.
4054   //   __tgt_device_image   *DeviceImages;   // Arrays of device images
4055   //                                         // (one per device).
4056   //   __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all the
4057   //                                         // entries.
4058   //   __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
4059   //                                         // entries (non inclusive).
4060   // };
4061   if (TgtBinaryDescriptorQTy.isNull()) {
4062     ASTContext &C = CGM.getContext();
4063     auto *RD = C.buildImplicitRecord("__tgt_bin_desc");
4064     RD->startDefinition();
4065     addFieldToRecordDecl(
4066         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4067     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy()));
4068     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4069     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4070     RD->completeDefinition();
4071     TgtBinaryDescriptorQTy = C.getRecordType(RD);
4072   }
4073   return TgtBinaryDescriptorQTy;
4074 }
4075 
4076 namespace {
4077 struct PrivateHelpersTy {
4078   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
4079                    const VarDecl *PrivateElemInit)
4080       : Original(Original), PrivateCopy(PrivateCopy),
4081         PrivateElemInit(PrivateElemInit) {}
4082   const VarDecl *Original;
4083   const VarDecl *PrivateCopy;
4084   const VarDecl *PrivateElemInit;
4085 };
4086 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4087 } // anonymous namespace
4088 
4089 static RecordDecl *
4090 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
4091   if (!Privates.empty()) {
4092     auto &C = CGM.getContext();
4093     // Build struct .kmp_privates_t. {
4094     //         /*  private vars  */
4095     //       };
4096     auto *RD = C.buildImplicitRecord(".kmp_privates.t");
4097     RD->startDefinition();
4098     for (auto &&Pair : Privates) {
4099       auto *VD = Pair.second.Original;
4100       auto Type = VD->getType();
4101       Type = Type.getNonReferenceType();
4102       auto *FD = addFieldToRecordDecl(C, RD, Type);
4103       if (VD->hasAttrs()) {
4104         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4105              E(VD->getAttrs().end());
4106              I != E; ++I)
4107           FD->addAttr(*I);
4108       }
4109     }
4110     RD->completeDefinition();
4111     return RD;
4112   }
4113   return nullptr;
4114 }
4115 
4116 static RecordDecl *
4117 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
4118                          QualType KmpInt32Ty,
4119                          QualType KmpRoutineEntryPointerQTy) {
4120   auto &C = CGM.getContext();
4121   // Build struct kmp_task_t {
4122   //         void *              shareds;
4123   //         kmp_routine_entry_t routine;
4124   //         kmp_int32           part_id;
4125   //         kmp_cmplrdata_t data1;
4126   //         kmp_cmplrdata_t data2;
4127   // For taskloops additional fields:
4128   //         kmp_uint64          lb;
4129   //         kmp_uint64          ub;
4130   //         kmp_int64           st;
4131   //         kmp_int32           liter;
4132   //         void *              reductions;
4133   //       };
4134   auto *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4135   UD->startDefinition();
4136   addFieldToRecordDecl(C, UD, KmpInt32Ty);
4137   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4138   UD->completeDefinition();
4139   QualType KmpCmplrdataTy = C.getRecordType(UD);
4140   auto *RD = C.buildImplicitRecord("kmp_task_t");
4141   RD->startDefinition();
4142   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4143   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4144   addFieldToRecordDecl(C, RD, KmpInt32Ty);
4145   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4146   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4147   if (isOpenMPTaskLoopDirective(Kind)) {
4148     QualType KmpUInt64Ty =
4149         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4150     QualType KmpInt64Ty =
4151         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4152     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4153     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4154     addFieldToRecordDecl(C, RD, KmpInt64Ty);
4155     addFieldToRecordDecl(C, RD, KmpInt32Ty);
4156     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4157   }
4158   RD->completeDefinition();
4159   return RD;
4160 }
4161 
4162 static RecordDecl *
4163 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
4164                                      ArrayRef<PrivateDataTy> Privates) {
4165   auto &C = CGM.getContext();
4166   // Build struct kmp_task_t_with_privates {
4167   //         kmp_task_t task_data;
4168   //         .kmp_privates_t. privates;
4169   //       };
4170   auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4171   RD->startDefinition();
4172   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4173   if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) {
4174     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4175   }
4176   RD->completeDefinition();
4177   return RD;
4178 }
4179 
4180 /// \brief Emit a proxy function which accepts kmp_task_t as the second
4181 /// argument.
4182 /// \code
4183 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4184 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4185 ///   For taskloops:
4186 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4187 ///   tt->reductions, tt->shareds);
4188 ///   return 0;
4189 /// }
4190 /// \endcode
4191 static llvm::Value *
4192 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
4193                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4194                       QualType KmpTaskTWithPrivatesPtrQTy,
4195                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4196                       QualType SharedsPtrTy, llvm::Value *TaskFunction,
4197                       llvm::Value *TaskPrivatesMap) {
4198   auto &C = CGM.getContext();
4199   FunctionArgList Args;
4200   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4201                             ImplicitParamDecl::Other);
4202   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4203                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4204                                 ImplicitParamDecl::Other);
4205   Args.push_back(&GtidArg);
4206   Args.push_back(&TaskTypeArg);
4207   auto &TaskEntryFnInfo =
4208       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4209   auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4210   auto *TaskEntry =
4211       llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
4212                              ".omp_task_entry.", &CGM.getModule());
4213   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4214   CodeGenFunction CGF(CGM);
4215   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4216                     Loc, Loc);
4217 
4218   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4219   // tt,
4220   // For taskloops:
4221   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4222   // tt->task_data.shareds);
4223   auto *GtidParam = CGF.EmitLoadOfScalar(
4224       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4225   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4226       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4227       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4228   auto *KmpTaskTWithPrivatesQTyRD =
4229       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4230   LValue Base =
4231       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4232   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4233   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4234   auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4235   auto *PartidParam = PartIdLVal.getPointer();
4236 
4237   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4238   auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4239   auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4240       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4241       CGF.ConvertTypeForMem(SharedsPtrTy));
4242 
4243   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4244   llvm::Value *PrivatesParam;
4245   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4246     auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4247     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4248         PrivatesLVal.getPointer(), CGF.VoidPtrTy);
4249   } else
4250     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4251 
4252   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4253                                TaskPrivatesMap,
4254                                CGF.Builder
4255                                    .CreatePointerBitCastOrAddrSpaceCast(
4256                                        TDBase.getAddress(), CGF.VoidPtrTy)
4257                                    .getPointer()};
4258   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4259                                           std::end(CommonArgs));
4260   if (isOpenMPTaskLoopDirective(Kind)) {
4261     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4262     auto LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4263     auto *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4264     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4265     auto UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4266     auto *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4267     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4268     auto StLVal = CGF.EmitLValueForField(Base, *StFI);
4269     auto *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4270     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4271     auto LILVal = CGF.EmitLValueForField(Base, *LIFI);
4272     auto *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4273     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4274     auto RLVal = CGF.EmitLValueForField(Base, *RFI);
4275     auto *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4276     CallArgs.push_back(LBParam);
4277     CallArgs.push_back(UBParam);
4278     CallArgs.push_back(StParam);
4279     CallArgs.push_back(LIParam);
4280     CallArgs.push_back(RParam);
4281   }
4282   CallArgs.push_back(SharedsParam);
4283 
4284   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4285                                                   CallArgs);
4286   CGF.EmitStoreThroughLValue(
4287       RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4288       CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4289   CGF.FinishFunction();
4290   return TaskEntry;
4291 }
4292 
4293 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
4294                                             SourceLocation Loc,
4295                                             QualType KmpInt32Ty,
4296                                             QualType KmpTaskTWithPrivatesPtrQTy,
4297                                             QualType KmpTaskTWithPrivatesQTy) {
4298   auto &C = CGM.getContext();
4299   FunctionArgList Args;
4300   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4301                             ImplicitParamDecl::Other);
4302   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4303                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4304                                 ImplicitParamDecl::Other);
4305   Args.push_back(&GtidArg);
4306   Args.push_back(&TaskTypeArg);
4307   auto &DestructorFnInfo =
4308       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4309   auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo);
4310   auto *DestructorFn =
4311       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
4312                              ".omp_task_destructor.", &CGM.getModule());
4313   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4314                                     DestructorFnInfo);
4315   CodeGenFunction CGF(CGM);
4316   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4317                     Args, Loc, Loc);
4318 
4319   LValue Base = CGF.EmitLoadOfPointerLValue(
4320       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4321       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4322   auto *KmpTaskTWithPrivatesQTyRD =
4323       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4324   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4325   Base = CGF.EmitLValueForField(Base, *FI);
4326   for (auto *Field :
4327        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4328     if (auto DtorKind = Field->getType().isDestructedType()) {
4329       auto FieldLValue = CGF.EmitLValueForField(Base, Field);
4330       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
4331     }
4332   }
4333   CGF.FinishFunction();
4334   return DestructorFn;
4335 }
4336 
4337 /// \brief Emit a privates mapping function for correct handling of private and
4338 /// firstprivate variables.
4339 /// \code
4340 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4341 /// **noalias priv1,...,  <tyn> **noalias privn) {
4342 ///   *priv1 = &.privates.priv1;
4343 ///   ...;
4344 ///   *privn = &.privates.privn;
4345 /// }
4346 /// \endcode
4347 static llvm::Value *
4348 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
4349                                ArrayRef<const Expr *> PrivateVars,
4350                                ArrayRef<const Expr *> FirstprivateVars,
4351                                ArrayRef<const Expr *> LastprivateVars,
4352                                QualType PrivatesQTy,
4353                                ArrayRef<PrivateDataTy> Privates) {
4354   auto &C = CGM.getContext();
4355   FunctionArgList Args;
4356   ImplicitParamDecl TaskPrivatesArg(
4357       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4358       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4359       ImplicitParamDecl::Other);
4360   Args.push_back(&TaskPrivatesArg);
4361   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4362   unsigned Counter = 1;
4363   for (auto *E: PrivateVars) {
4364     Args.push_back(ImplicitParamDecl::Create(
4365         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4366         C.getPointerType(C.getPointerType(E->getType()))
4367             .withConst()
4368             .withRestrict(),
4369         ImplicitParamDecl::Other));
4370     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4371     PrivateVarsPos[VD] = Counter;
4372     ++Counter;
4373   }
4374   for (auto *E : FirstprivateVars) {
4375     Args.push_back(ImplicitParamDecl::Create(
4376         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4377         C.getPointerType(C.getPointerType(E->getType()))
4378             .withConst()
4379             .withRestrict(),
4380         ImplicitParamDecl::Other));
4381     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4382     PrivateVarsPos[VD] = Counter;
4383     ++Counter;
4384   }
4385   for (auto *E: LastprivateVars) {
4386     Args.push_back(ImplicitParamDecl::Create(
4387         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4388         C.getPointerType(C.getPointerType(E->getType()))
4389             .withConst()
4390             .withRestrict(),
4391         ImplicitParamDecl::Other));
4392     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4393     PrivateVarsPos[VD] = Counter;
4394     ++Counter;
4395   }
4396   auto &TaskPrivatesMapFnInfo =
4397       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4398   auto *TaskPrivatesMapTy =
4399       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4400   auto *TaskPrivatesMap = llvm::Function::Create(
4401       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage,
4402       ".omp_task_privates_map.", &CGM.getModule());
4403   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
4404                                     TaskPrivatesMapFnInfo);
4405   TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4406   TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4407   TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4408   CodeGenFunction CGF(CGM);
4409   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4410                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
4411 
4412   // *privi = &.privates.privi;
4413   LValue Base = CGF.EmitLoadOfPointerLValue(
4414       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4415       TaskPrivatesArg.getType()->castAs<PointerType>());
4416   auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4417   Counter = 0;
4418   for (auto *Field : PrivatesQTyRD->fields()) {
4419     auto FieldLVal = CGF.EmitLValueForField(Base, Field);
4420     auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4421     auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4422     auto RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4423         RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
4424     CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
4425     ++Counter;
4426   }
4427   CGF.FinishFunction();
4428   return TaskPrivatesMap;
4429 }
4430 
4431 static bool stable_sort_comparator(const PrivateDataTy P1,
4432                                    const PrivateDataTy P2) {
4433   return P1.first > P2.first;
4434 }
4435 
4436 /// Emit initialization for private variables in task-based directives.
4437 static void emitPrivatesInit(CodeGenFunction &CGF,
4438                              const OMPExecutableDirective &D,
4439                              Address KmpTaskSharedsPtr, LValue TDBase,
4440                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4441                              QualType SharedsTy, QualType SharedsPtrTy,
4442                              const OMPTaskDataTy &Data,
4443                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4444   auto &C = CGF.getContext();
4445   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4446   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4447   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
4448                                  ? OMPD_taskloop
4449                                  : OMPD_task;
4450   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
4451   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
4452   LValue SrcBase;
4453   bool IsTargetTask =
4454       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
4455       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
4456   // For target-based directives skip 3 firstprivate arrays BasePointersArray,
4457   // PointersArray and SizesArray. The original variables for these arrays are
4458   // not captured and we get their addresses explicitly.
4459   if ((!IsTargetTask && !Data.FirstprivateVars.empty()) ||
4460       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
4461     SrcBase = CGF.MakeAddrLValue(
4462         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4463             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4464         SharedsTy);
4465   }
4466   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4467   for (auto &&Pair : Privates) {
4468     auto *VD = Pair.second.PrivateCopy;
4469     auto *Init = VD->getAnyInitializer();
4470     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4471                              !CGF.isTrivialInitializer(Init)))) {
4472       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4473       if (auto *Elem = Pair.second.PrivateElemInit) {
4474         auto *OriginalVD = Pair.second.Original;
4475         // Check if the variable is the target-based BasePointersArray,
4476         // PointersArray or SizesArray.
4477         LValue SharedRefLValue;
4478         QualType Type = OriginalVD->getType();
4479         auto *SharedField = CapturesInfo.lookup(OriginalVD);
4480         if (IsTargetTask && !SharedField) {
4481           assert(isa<ImplicitParamDecl>(OriginalVD) &&
4482                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
4483                  cast<CapturedDecl>(OriginalVD->getDeclContext())
4484                          ->getNumParams() == 0 &&
4485                  isa<TranslationUnitDecl>(
4486                      cast<CapturedDecl>(OriginalVD->getDeclContext())
4487                          ->getDeclContext()) &&
4488                  "Expected artificial target data variable.");
4489           SharedRefLValue =
4490               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
4491         } else {
4492           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4493           SharedRefLValue = CGF.MakeAddrLValue(
4494               Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
4495               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
4496               SharedRefLValue.getTBAAInfo());
4497         }
4498         if (Type->isArrayType()) {
4499           // Initialize firstprivate array.
4500           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4501             // Perform simple memcpy.
4502             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
4503           } else {
4504             // Initialize firstprivate array using element-by-element
4505             // initialization.
4506             CGF.EmitOMPAggregateAssign(
4507                 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
4508                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4509                                                   Address SrcElement) {
4510                   // Clean up any temporaries needed by the initialization.
4511                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
4512                   InitScope.addPrivate(
4513                       Elem, [SrcElement]() -> Address { return SrcElement; });
4514                   (void)InitScope.Privatize();
4515                   // Emit initialization for single element.
4516                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
4517                       CGF, &CapturesInfo);
4518                   CGF.EmitAnyExprToMem(Init, DestElement,
4519                                        Init->getType().getQualifiers(),
4520                                        /*IsInitializer=*/false);
4521                 });
4522           }
4523         } else {
4524           CodeGenFunction::OMPPrivateScope InitScope(CGF);
4525           InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
4526             return SharedRefLValue.getAddress();
4527           });
4528           (void)InitScope.Privatize();
4529           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4530           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4531                              /*capturedByInit=*/false);
4532         }
4533       } else
4534         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4535     }
4536     ++FI;
4537   }
4538 }
4539 
4540 /// Check if duplication function is required for taskloops.
4541 static bool checkInitIsRequired(CodeGenFunction &CGF,
4542                                 ArrayRef<PrivateDataTy> Privates) {
4543   bool InitRequired = false;
4544   for (auto &&Pair : Privates) {
4545     auto *VD = Pair.second.PrivateCopy;
4546     auto *Init = VD->getAnyInitializer();
4547     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4548                                     !CGF.isTrivialInitializer(Init));
4549   }
4550   return InitRequired;
4551 }
4552 
4553 
4554 /// Emit task_dup function (for initialization of
4555 /// private/firstprivate/lastprivate vars and last_iter flag)
4556 /// \code
4557 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4558 /// lastpriv) {
4559 /// // setup lastprivate flag
4560 ///    task_dst->last = lastpriv;
4561 /// // could be constructor calls here...
4562 /// }
4563 /// \endcode
4564 static llvm::Value *
4565 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4566                     const OMPExecutableDirective &D,
4567                     QualType KmpTaskTWithPrivatesPtrQTy,
4568                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4569                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4570                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4571                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4572   auto &C = CGM.getContext();
4573   FunctionArgList Args;
4574   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4575                            KmpTaskTWithPrivatesPtrQTy,
4576                            ImplicitParamDecl::Other);
4577   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4578                            KmpTaskTWithPrivatesPtrQTy,
4579                            ImplicitParamDecl::Other);
4580   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4581                                 ImplicitParamDecl::Other);
4582   Args.push_back(&DstArg);
4583   Args.push_back(&SrcArg);
4584   Args.push_back(&LastprivArg);
4585   auto &TaskDupFnInfo =
4586       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4587   auto *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4588   auto *TaskDup =
4589       llvm::Function::Create(TaskDupTy, llvm::GlobalValue::InternalLinkage,
4590                              ".omp_task_dup.", &CGM.getModule());
4591   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4592   CodeGenFunction CGF(CGM);
4593   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4594                     Loc);
4595 
4596   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4597       CGF.GetAddrOfLocalVar(&DstArg),
4598       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4599   // task_dst->liter = lastpriv;
4600   if (WithLastIter) {
4601     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4602     LValue Base = CGF.EmitLValueForField(
4603         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4604     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4605     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4606         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4607     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4608   }
4609 
4610   // Emit initial values for private copies (if any).
4611   assert(!Privates.empty());
4612   Address KmpTaskSharedsPtr = Address::invalid();
4613   if (!Data.FirstprivateVars.empty()) {
4614     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4615         CGF.GetAddrOfLocalVar(&SrcArg),
4616         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4617     LValue Base = CGF.EmitLValueForField(
4618         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4619     KmpTaskSharedsPtr = Address(
4620         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4621                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4622                                                   KmpTaskTShareds)),
4623                              Loc),
4624         CGF.getNaturalTypeAlignment(SharedsTy));
4625   }
4626   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4627                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4628   CGF.FinishFunction();
4629   return TaskDup;
4630 }
4631 
4632 /// Checks if destructor function is required to be generated.
4633 /// \return true if cleanups are required, false otherwise.
4634 static bool
4635 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4636   bool NeedsCleanup = false;
4637   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4638   auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4639   for (auto *FD : PrivateRD->fields()) {
4640     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4641     if (NeedsCleanup)
4642       break;
4643   }
4644   return NeedsCleanup;
4645 }
4646 
4647 CGOpenMPRuntime::TaskResultTy
4648 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4649                               const OMPExecutableDirective &D,
4650                               llvm::Value *TaskFunction, QualType SharedsTy,
4651                               Address Shareds, const OMPTaskDataTy &Data) {
4652   auto &C = CGM.getContext();
4653   llvm::SmallVector<PrivateDataTy, 4> Privates;
4654   // Aggregate privates and sort them by the alignment.
4655   auto I = Data.PrivateCopies.begin();
4656   for (auto *E : Data.PrivateVars) {
4657     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4658     Privates.push_back(std::make_pair(
4659         C.getDeclAlign(VD),
4660         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4661                          /*PrivateElemInit=*/nullptr)));
4662     ++I;
4663   }
4664   I = Data.FirstprivateCopies.begin();
4665   auto IElemInitRef = Data.FirstprivateInits.begin();
4666   for (auto *E : Data.FirstprivateVars) {
4667     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4668     Privates.push_back(std::make_pair(
4669         C.getDeclAlign(VD),
4670         PrivateHelpersTy(
4671             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4672             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))));
4673     ++I;
4674     ++IElemInitRef;
4675   }
4676   I = Data.LastprivateCopies.begin();
4677   for (auto *E : Data.LastprivateVars) {
4678     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4679     Privates.push_back(std::make_pair(
4680         C.getDeclAlign(VD),
4681         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4682                          /*PrivateElemInit=*/nullptr)));
4683     ++I;
4684   }
4685   std::stable_sort(Privates.begin(), Privates.end(), stable_sort_comparator);
4686   auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4687   // Build type kmp_routine_entry_t (if not built yet).
4688   emitKmpRoutineEntryT(KmpInt32Ty);
4689   // Build type kmp_task_t (if not built yet).
4690   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4691     if (SavedKmpTaskloopTQTy.isNull()) {
4692       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4693           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4694     }
4695     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4696   } else {
4697     assert((D.getDirectiveKind() == OMPD_task ||
4698             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4699             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4700            "Expected taskloop, task or target directive");
4701     if (SavedKmpTaskTQTy.isNull()) {
4702       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4703           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4704     }
4705     KmpTaskTQTy = SavedKmpTaskTQTy;
4706   }
4707   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4708   // Build particular struct kmp_task_t for the given task.
4709   auto *KmpTaskTWithPrivatesQTyRD =
4710       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4711   auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4712   QualType KmpTaskTWithPrivatesPtrQTy =
4713       C.getPointerType(KmpTaskTWithPrivatesQTy);
4714   auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4715   auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo();
4716   auto *KmpTaskTWithPrivatesTySize = CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4717   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4718 
4719   // Emit initial values for private copies (if any).
4720   llvm::Value *TaskPrivatesMap = nullptr;
4721   auto *TaskPrivatesMapTy =
4722       std::next(cast<llvm::Function>(TaskFunction)->arg_begin(), 3)->getType();
4723   if (!Privates.empty()) {
4724     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4725     TaskPrivatesMap = emitTaskPrivateMappingFunction(
4726         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
4727         FI->getType(), Privates);
4728     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4729         TaskPrivatesMap, TaskPrivatesMapTy);
4730   } else {
4731     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4732         cast<llvm::PointerType>(TaskPrivatesMapTy));
4733   }
4734   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4735   // kmp_task_t *tt);
4736   auto *TaskEntry = emitProxyTaskFunction(
4737       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4738       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4739       TaskPrivatesMap);
4740 
4741   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4742   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4743   // kmp_routine_entry_t *task_entry);
4744   // Task flags. Format is taken from
4745   // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
4746   // description of kmp_tasking_flags struct.
4747   enum {
4748     TiedFlag = 0x1,
4749     FinalFlag = 0x2,
4750     DestructorsFlag = 0x8,
4751     PriorityFlag = 0x20
4752   };
4753   unsigned Flags = Data.Tied ? TiedFlag : 0;
4754   bool NeedsCleanup = false;
4755   if (!Privates.empty()) {
4756     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
4757     if (NeedsCleanup)
4758       Flags = Flags | DestructorsFlag;
4759   }
4760   if (Data.Priority.getInt())
4761     Flags = Flags | PriorityFlag;
4762   auto *TaskFlags =
4763       Data.Final.getPointer()
4764           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4765                                      CGF.Builder.getInt32(FinalFlag),
4766                                      CGF.Builder.getInt32(/*C=*/0))
4767           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4768   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4769   auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4770   llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
4771                               getThreadID(CGF, Loc), TaskFlags,
4772                               KmpTaskTWithPrivatesTySize, SharedsSize,
4773                               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4774                                   TaskEntry, KmpRoutineEntryPtrTy)};
4775   auto *NewTask = CGF.EmitRuntimeCall(
4776       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
4777   auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4778       NewTask, KmpTaskTWithPrivatesPtrTy);
4779   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4780                                                KmpTaskTWithPrivatesQTy);
4781   LValue TDBase =
4782       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4783   // Fill the data in the resulting kmp_task_t record.
4784   // Copy shareds if there are any.
4785   Address KmpTaskSharedsPtr = Address::invalid();
4786   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4787     KmpTaskSharedsPtr =
4788         Address(CGF.EmitLoadOfScalar(
4789                     CGF.EmitLValueForField(
4790                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4791                                            KmpTaskTShareds)),
4792                     Loc),
4793                 CGF.getNaturalTypeAlignment(SharedsTy));
4794     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4795     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4796     CGF.EmitAggregateCopy(Dest, Src, SharedsTy);
4797   }
4798   // Emit initial values for private copies (if any).
4799   TaskResultTy Result;
4800   if (!Privates.empty()) {
4801     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4802                      SharedsTy, SharedsPtrTy, Data, Privates,
4803                      /*ForDup=*/false);
4804     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4805         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4806       Result.TaskDupFn = emitTaskDupFunction(
4807           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4808           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4809           /*WithLastIter=*/!Data.LastprivateVars.empty());
4810     }
4811   }
4812   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4813   enum { Priority = 0, Destructors = 1 };
4814   // Provide pointer to function with destructors for privates.
4815   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4816   auto *KmpCmplrdataUD = (*FI)->getType()->getAsUnionType()->getDecl();
4817   if (NeedsCleanup) {
4818     llvm::Value *DestructorFn = emitDestructorsFunction(
4819         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4820         KmpTaskTWithPrivatesQTy);
4821     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4822     LValue DestructorsLV = CGF.EmitLValueForField(
4823         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4824     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4825                               DestructorFn, KmpRoutineEntryPtrTy),
4826                           DestructorsLV);
4827   }
4828   // Set priority.
4829   if (Data.Priority.getInt()) {
4830     LValue Data2LV = CGF.EmitLValueForField(
4831         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4832     LValue PriorityLV = CGF.EmitLValueForField(
4833         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4834     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4835   }
4836   Result.NewTask = NewTask;
4837   Result.TaskEntry = TaskEntry;
4838   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4839   Result.TDBase = TDBase;
4840   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4841   return Result;
4842 }
4843 
4844 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
4845                                    const OMPExecutableDirective &D,
4846                                    llvm::Value *TaskFunction,
4847                                    QualType SharedsTy, Address Shareds,
4848                                    const Expr *IfCond,
4849                                    const OMPTaskDataTy &Data) {
4850   if (!CGF.HaveInsertPoint())
4851     return;
4852 
4853   TaskResultTy Result =
4854       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4855   llvm::Value *NewTask = Result.NewTask;
4856   llvm::Value *TaskEntry = Result.TaskEntry;
4857   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4858   LValue TDBase = Result.TDBase;
4859   RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4860   auto &C = CGM.getContext();
4861   // Process list of dependences.
4862   Address DependenciesArray = Address::invalid();
4863   unsigned NumDependencies = Data.Dependences.size();
4864   if (NumDependencies) {
4865     // Dependence kind for RTL.
4866     enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 };
4867     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4868     RecordDecl *KmpDependInfoRD;
4869     QualType FlagsTy =
4870         C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4871     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4872     if (KmpDependInfoTy.isNull()) {
4873       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4874       KmpDependInfoRD->startDefinition();
4875       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4876       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4877       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4878       KmpDependInfoRD->completeDefinition();
4879       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4880     } else
4881       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4882     CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy);
4883     // Define type kmp_depend_info[<Dependences.size()>];
4884     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4885         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
4886         ArrayType::Normal, /*IndexTypeQuals=*/0);
4887     // kmp_depend_info[<Dependences.size()>] deps;
4888     DependenciesArray =
4889         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4890     for (unsigned i = 0; i < NumDependencies; ++i) {
4891       const Expr *E = Data.Dependences[i].second;
4892       auto Addr = CGF.EmitLValue(E);
4893       llvm::Value *Size;
4894       QualType Ty = E->getType();
4895       if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4896         LValue UpAddrLVal =
4897             CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
4898         llvm::Value *UpAddr =
4899             CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
4900         llvm::Value *LowIntPtr =
4901             CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
4902         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
4903         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4904       } else
4905         Size = CGF.getTypeSize(Ty);
4906       auto Base = CGF.MakeAddrLValue(
4907           CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize),
4908           KmpDependInfoTy);
4909       // deps[i].base_addr = &<Dependences[i].second>;
4910       auto BaseAddrLVal = CGF.EmitLValueForField(
4911           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4912       CGF.EmitStoreOfScalar(
4913           CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
4914           BaseAddrLVal);
4915       // deps[i].len = sizeof(<Dependences[i].second>);
4916       auto LenLVal = CGF.EmitLValueForField(
4917           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4918       CGF.EmitStoreOfScalar(Size, LenLVal);
4919       // deps[i].flags = <Dependences[i].first>;
4920       RTLDependenceKindTy DepKind;
4921       switch (Data.Dependences[i].first) {
4922       case OMPC_DEPEND_in:
4923         DepKind = DepIn;
4924         break;
4925       // Out and InOut dependencies must use the same code.
4926       case OMPC_DEPEND_out:
4927       case OMPC_DEPEND_inout:
4928         DepKind = DepInOut;
4929         break;
4930       case OMPC_DEPEND_source:
4931       case OMPC_DEPEND_sink:
4932       case OMPC_DEPEND_unknown:
4933         llvm_unreachable("Unknown task dependence type");
4934       }
4935       auto FlagsLVal = CGF.EmitLValueForField(
4936           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4937       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4938                             FlagsLVal);
4939     }
4940     DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4941         CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()),
4942         CGF.VoidPtrTy);
4943   }
4944 
4945   // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
4946   // libcall.
4947   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4948   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4949   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4950   // list is not empty
4951   auto *ThreadID = getThreadID(CGF, Loc);
4952   auto *UpLoc = emitUpdateLocation(CGF, Loc);
4953   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4954   llvm::Value *DepTaskArgs[7];
4955   if (NumDependencies) {
4956     DepTaskArgs[0] = UpLoc;
4957     DepTaskArgs[1] = ThreadID;
4958     DepTaskArgs[2] = NewTask;
4959     DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
4960     DepTaskArgs[4] = DependenciesArray.getPointer();
4961     DepTaskArgs[5] = CGF.Builder.getInt32(0);
4962     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4963   }
4964   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
4965                         &TaskArgs,
4966                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4967     if (!Data.Tied) {
4968       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4969       auto PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4970       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4971     }
4972     if (NumDependencies) {
4973       CGF.EmitRuntimeCall(
4974           createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
4975     } else {
4976       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
4977                           TaskArgs);
4978     }
4979     // Check if parent region is untied and build return for untied task;
4980     if (auto *Region =
4981             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4982       Region->emitUntiedSwitch(CGF);
4983   };
4984 
4985   llvm::Value *DepWaitTaskArgs[6];
4986   if (NumDependencies) {
4987     DepWaitTaskArgs[0] = UpLoc;
4988     DepWaitTaskArgs[1] = ThreadID;
4989     DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
4990     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
4991     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4992     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4993   }
4994   auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
4995                         NumDependencies, &DepWaitTaskArgs,
4996                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4997     auto &RT = CGF.CGM.getOpenMPRuntime();
4998     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4999     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5000     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5001     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5002     // is specified.
5003     if (NumDependencies)
5004       CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
5005                           DepWaitTaskArgs);
5006     // Call proxy_task_entry(gtid, new_task);
5007     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5008                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5009       Action.Enter(CGF);
5010       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5011       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5012                                                           OutlinedFnArgs);
5013     };
5014 
5015     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5016     // kmp_task_t *new_task);
5017     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5018     // kmp_task_t *new_task);
5019     RegionCodeGenTy RCG(CodeGen);
5020     CommonActionTy Action(
5021         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
5022         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
5023     RCG.setAction(Action);
5024     RCG(CGF);
5025   };
5026 
5027   if (IfCond)
5028     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5029   else {
5030     RegionCodeGenTy ThenRCG(ThenCodeGen);
5031     ThenRCG(CGF);
5032   }
5033 }
5034 
5035 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5036                                        const OMPLoopDirective &D,
5037                                        llvm::Value *TaskFunction,
5038                                        QualType SharedsTy, Address Shareds,
5039                                        const Expr *IfCond,
5040                                        const OMPTaskDataTy &Data) {
5041   if (!CGF.HaveInsertPoint())
5042     return;
5043   TaskResultTy Result =
5044       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5045   // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
5046   // libcall.
5047   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5048   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5049   // sched, kmp_uint64 grainsize, void *task_dup);
5050   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5051   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5052   llvm::Value *IfVal;
5053   if (IfCond) {
5054     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5055                                       /*isSigned=*/true);
5056   } else
5057     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5058 
5059   LValue LBLVal = CGF.EmitLValueForField(
5060       Result.TDBase,
5061       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5062   auto *LBVar =
5063       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5064   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
5065                        /*IsInitializer=*/true);
5066   LValue UBLVal = CGF.EmitLValueForField(
5067       Result.TDBase,
5068       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5069   auto *UBVar =
5070       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5071   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
5072                        /*IsInitializer=*/true);
5073   LValue StLVal = CGF.EmitLValueForField(
5074       Result.TDBase,
5075       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5076   auto *StVar =
5077       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5078   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
5079                        /*IsInitializer=*/true);
5080   // Store reductions address.
5081   LValue RedLVal = CGF.EmitLValueForField(
5082       Result.TDBase,
5083       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5084   if (Data.Reductions)
5085     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5086   else {
5087     CGF.EmitNullInitialization(RedLVal.getAddress(),
5088                                CGF.getContext().VoidPtrTy);
5089   }
5090   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5091   llvm::Value *TaskArgs[] = {
5092       UpLoc,
5093       ThreadID,
5094       Result.NewTask,
5095       IfVal,
5096       LBLVal.getPointer(),
5097       UBLVal.getPointer(),
5098       CGF.EmitLoadOfScalar(StLVal, Loc),
5099       llvm::ConstantInt::getNullValue(
5100           CGF.IntTy), // Always 0 because taskgroup emitted by the compiler
5101       llvm::ConstantInt::getSigned(
5102           CGF.IntTy, Data.Schedule.getPointer()
5103                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5104                          : NoSchedule),
5105       Data.Schedule.getPointer()
5106           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5107                                       /*isSigned=*/false)
5108           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5109       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5110                              Result.TaskDupFn, CGF.VoidPtrTy)
5111                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5112   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
5113 }
5114 
5115 /// \brief Emit reduction operation for each element of array (required for
5116 /// array sections) LHS op = RHS.
5117 /// \param Type Type of array.
5118 /// \param LHSVar Variable on the left side of the reduction operation
5119 /// (references element of array in original variable).
5120 /// \param RHSVar Variable on the right side of the reduction operation
5121 /// (references element of array in original variable).
5122 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5123 /// RHSVar.
5124 static void EmitOMPAggregateReduction(
5125     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5126     const VarDecl *RHSVar,
5127     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5128                                   const Expr *, const Expr *)> &RedOpGen,
5129     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5130     const Expr *UpExpr = nullptr) {
5131   // Perform element-by-element initialization.
5132   QualType ElementTy;
5133   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5134   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5135 
5136   // Drill down to the base element type on both arrays.
5137   auto ArrayTy = Type->getAsArrayTypeUnsafe();
5138   auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5139 
5140   auto RHSBegin = RHSAddr.getPointer();
5141   auto LHSBegin = LHSAddr.getPointer();
5142   // Cast from pointer to array type to pointer to single element.
5143   auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5144   // The basic structure here is a while-do loop.
5145   auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5146   auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5147   auto IsEmpty =
5148       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5149   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5150 
5151   // Enter the loop body, making that address the current address.
5152   auto EntryBB = CGF.Builder.GetInsertBlock();
5153   CGF.EmitBlock(BodyBB);
5154 
5155   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5156 
5157   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5158       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5159   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5160   Address RHSElementCurrent =
5161       Address(RHSElementPHI,
5162               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5163 
5164   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5165       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5166   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5167   Address LHSElementCurrent =
5168       Address(LHSElementPHI,
5169               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5170 
5171   // Emit copy.
5172   CodeGenFunction::OMPPrivateScope Scope(CGF);
5173   Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; });
5174   Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; });
5175   Scope.Privatize();
5176   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5177   Scope.ForceCleanup();
5178 
5179   // Shift the address forward by one element.
5180   auto LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5181       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5182   auto RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5183       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5184   // Check whether we've reached the end.
5185   auto Done =
5186       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5187   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5188   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5189   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5190 
5191   // Done.
5192   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5193 }
5194 
5195 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5196 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5197 /// UDR combiner function.
5198 static void emitReductionCombiner(CodeGenFunction &CGF,
5199                                   const Expr *ReductionOp) {
5200   if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
5201     if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5202       if (auto *DRE =
5203               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5204         if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5205           std::pair<llvm::Function *, llvm::Function *> Reduction =
5206               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5207           RValue Func = RValue::get(Reduction.first);
5208           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5209           CGF.EmitIgnoredExpr(ReductionOp);
5210           return;
5211         }
5212   CGF.EmitIgnoredExpr(ReductionOp);
5213 }
5214 
5215 llvm::Value *CGOpenMPRuntime::emitReductionFunction(
5216     CodeGenModule &CGM, SourceLocation Loc, llvm::Type *ArgsType,
5217     ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
5218     ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
5219   auto &C = CGM.getContext();
5220 
5221   // void reduction_func(void *LHSArg, void *RHSArg);
5222   FunctionArgList Args;
5223   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5224                            ImplicitParamDecl::Other);
5225   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5226                            ImplicitParamDecl::Other);
5227   Args.push_back(&LHSArg);
5228   Args.push_back(&RHSArg);
5229   auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5230   auto *Fn = llvm::Function::Create(
5231       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
5232       ".omp.reduction.reduction_func", &CGM.getModule());
5233   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5234   CodeGenFunction CGF(CGM);
5235   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5236 
5237   // Dst = (void*[n])(LHSArg);
5238   // Src = (void*[n])(RHSArg);
5239   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5240       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5241       ArgsType), CGF.getPointerAlign());
5242   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5243       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5244       ArgsType), CGF.getPointerAlign());
5245 
5246   //  ...
5247   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5248   //  ...
5249   CodeGenFunction::OMPPrivateScope Scope(CGF);
5250   auto IPriv = Privates.begin();
5251   unsigned Idx = 0;
5252   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5253     auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5254     Scope.addPrivate(RHSVar, [&]() -> Address {
5255       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5256     });
5257     auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5258     Scope.addPrivate(LHSVar, [&]() -> Address {
5259       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5260     });
5261     QualType PrivTy = (*IPriv)->getType();
5262     if (PrivTy->isVariablyModifiedType()) {
5263       // Get array size and emit VLA type.
5264       ++Idx;
5265       Address Elem =
5266           CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize());
5267       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5268       auto *VLA = CGF.getContext().getAsVariableArrayType(PrivTy);
5269       auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5270       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5271           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5272       CGF.EmitVariablyModifiedType(PrivTy);
5273     }
5274   }
5275   Scope.Privatize();
5276   IPriv = Privates.begin();
5277   auto ILHS = LHSExprs.begin();
5278   auto IRHS = RHSExprs.begin();
5279   for (auto *E : ReductionOps) {
5280     if ((*IPriv)->getType()->isArrayType()) {
5281       // Emit reduction for array section.
5282       auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5283       auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5284       EmitOMPAggregateReduction(
5285           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5286           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5287             emitReductionCombiner(CGF, E);
5288           });
5289     } else
5290       // Emit reduction for array subscript or single variable.
5291       emitReductionCombiner(CGF, E);
5292     ++IPriv;
5293     ++ILHS;
5294     ++IRHS;
5295   }
5296   Scope.ForceCleanup();
5297   CGF.FinishFunction();
5298   return Fn;
5299 }
5300 
5301 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5302                                                   const Expr *ReductionOp,
5303                                                   const Expr *PrivateRef,
5304                                                   const DeclRefExpr *LHS,
5305                                                   const DeclRefExpr *RHS) {
5306   if (PrivateRef->getType()->isArrayType()) {
5307     // Emit reduction for array section.
5308     auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5309     auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5310     EmitOMPAggregateReduction(
5311         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5312         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5313           emitReductionCombiner(CGF, ReductionOp);
5314         });
5315   } else
5316     // Emit reduction for array subscript or single variable.
5317     emitReductionCombiner(CGF, ReductionOp);
5318 }
5319 
5320 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5321                                     ArrayRef<const Expr *> Privates,
5322                                     ArrayRef<const Expr *> LHSExprs,
5323                                     ArrayRef<const Expr *> RHSExprs,
5324                                     ArrayRef<const Expr *> ReductionOps,
5325                                     ReductionOptionsTy Options) {
5326   if (!CGF.HaveInsertPoint())
5327     return;
5328 
5329   bool WithNowait = Options.WithNowait;
5330   bool SimpleReduction = Options.SimpleReduction;
5331 
5332   // Next code should be emitted for reduction:
5333   //
5334   // static kmp_critical_name lock = { 0 };
5335   //
5336   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5337   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5338   //  ...
5339   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5340   //  *(Type<n>-1*)rhs[<n>-1]);
5341   // }
5342   //
5343   // ...
5344   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5345   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5346   // RedList, reduce_func, &<lock>)) {
5347   // case 1:
5348   //  ...
5349   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5350   //  ...
5351   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5352   // break;
5353   // case 2:
5354   //  ...
5355   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5356   //  ...
5357   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5358   // break;
5359   // default:;
5360   // }
5361   //
5362   // if SimpleReduction is true, only the next code is generated:
5363   //  ...
5364   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5365   //  ...
5366 
5367   auto &C = CGM.getContext();
5368 
5369   if (SimpleReduction) {
5370     CodeGenFunction::RunCleanupsScope Scope(CGF);
5371     auto IPriv = Privates.begin();
5372     auto ILHS = LHSExprs.begin();
5373     auto IRHS = RHSExprs.begin();
5374     for (auto *E : ReductionOps) {
5375       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5376                                   cast<DeclRefExpr>(*IRHS));
5377       ++IPriv;
5378       ++ILHS;
5379       ++IRHS;
5380     }
5381     return;
5382   }
5383 
5384   // 1. Build a list of reduction variables.
5385   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5386   auto Size = RHSExprs.size();
5387   for (auto *E : Privates) {
5388     if (E->getType()->isVariablyModifiedType())
5389       // Reserve place for array size.
5390       ++Size;
5391   }
5392   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5393   QualType ReductionArrayTy =
5394       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
5395                              /*IndexTypeQuals=*/0);
5396   Address ReductionList =
5397       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5398   auto IPriv = Privates.begin();
5399   unsigned Idx = 0;
5400   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5401     Address Elem =
5402       CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize());
5403     CGF.Builder.CreateStore(
5404         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5405             CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
5406         Elem);
5407     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5408       // Store array size.
5409       ++Idx;
5410       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
5411                                              CGF.getPointerSize());
5412       llvm::Value *Size = CGF.Builder.CreateIntCast(
5413           CGF.getVLASize(
5414                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5415               .NumElts,
5416           CGF.SizeTy, /*isSigned=*/false);
5417       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5418                               Elem);
5419     }
5420   }
5421 
5422   // 2. Emit reduce_func().
5423   auto *ReductionFn = emitReductionFunction(
5424       CGM, Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(),
5425       Privates, LHSExprs, RHSExprs, ReductionOps);
5426 
5427   // 3. Create static kmp_critical_name lock = { 0 };
5428   auto *Lock = getCriticalRegionLock(".reduction");
5429 
5430   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5431   // RedList, reduce_func, &<lock>);
5432   auto *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5433   auto *ThreadId = getThreadID(CGF, Loc);
5434   auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5435   auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5436       ReductionList.getPointer(), CGF.VoidPtrTy);
5437   llvm::Value *Args[] = {
5438       IdentTLoc,                             // ident_t *<loc>
5439       ThreadId,                              // i32 <gtid>
5440       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5441       ReductionArrayTySize,                  // size_type sizeof(RedList)
5442       RL,                                    // void *RedList
5443       ReductionFn, // void (*) (void *, void *) <reduce_func>
5444       Lock         // kmp_critical_name *&<lock>
5445   };
5446   auto Res = CGF.EmitRuntimeCall(
5447       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
5448                                        : OMPRTL__kmpc_reduce),
5449       Args);
5450 
5451   // 5. Build switch(res)
5452   auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5453   auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5454 
5455   // 6. Build case 1:
5456   //  ...
5457   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5458   //  ...
5459   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5460   // break;
5461   auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5462   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5463   CGF.EmitBlock(Case1BB);
5464 
5465   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5466   llvm::Value *EndArgs[] = {
5467       IdentTLoc, // ident_t *<loc>
5468       ThreadId,  // i32 <gtid>
5469       Lock       // kmp_critical_name *&<lock>
5470   };
5471   auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps](
5472       CodeGenFunction &CGF, PrePostActionTy &Action) {
5473     auto &RT = CGF.CGM.getOpenMPRuntime();
5474     auto IPriv = Privates.begin();
5475     auto ILHS = LHSExprs.begin();
5476     auto IRHS = RHSExprs.begin();
5477     for (auto *E : ReductionOps) {
5478       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5479                                      cast<DeclRefExpr>(*IRHS));
5480       ++IPriv;
5481       ++ILHS;
5482       ++IRHS;
5483     }
5484   };
5485   RegionCodeGenTy RCG(CodeGen);
5486   CommonActionTy Action(
5487       nullptr, llvm::None,
5488       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
5489                                        : OMPRTL__kmpc_end_reduce),
5490       EndArgs);
5491   RCG.setAction(Action);
5492   RCG(CGF);
5493 
5494   CGF.EmitBranch(DefaultBB);
5495 
5496   // 7. Build case 2:
5497   //  ...
5498   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5499   //  ...
5500   // break;
5501   auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5502   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5503   CGF.EmitBlock(Case2BB);
5504 
5505   auto &&AtomicCodeGen = [Loc, &Privates, &LHSExprs, &RHSExprs, &ReductionOps](
5506       CodeGenFunction &CGF, PrePostActionTy &Action) {
5507     auto ILHS = LHSExprs.begin();
5508     auto IRHS = RHSExprs.begin();
5509     auto IPriv = Privates.begin();
5510     for (auto *E : ReductionOps) {
5511       const Expr *XExpr = nullptr;
5512       const Expr *EExpr = nullptr;
5513       const Expr *UpExpr = nullptr;
5514       BinaryOperatorKind BO = BO_Comma;
5515       if (auto *BO = dyn_cast<BinaryOperator>(E)) {
5516         if (BO->getOpcode() == BO_Assign) {
5517           XExpr = BO->getLHS();
5518           UpExpr = BO->getRHS();
5519         }
5520       }
5521       // Try to emit update expression as a simple atomic.
5522       auto *RHSExpr = UpExpr;
5523       if (RHSExpr) {
5524         // Analyze RHS part of the whole expression.
5525         if (auto *ACO = dyn_cast<AbstractConditionalOperator>(
5526                 RHSExpr->IgnoreParenImpCasts())) {
5527           // If this is a conditional operator, analyze its condition for
5528           // min/max reduction operator.
5529           RHSExpr = ACO->getCond();
5530         }
5531         if (auto *BORHS =
5532                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5533           EExpr = BORHS->getRHS();
5534           BO = BORHS->getOpcode();
5535         }
5536       }
5537       if (XExpr) {
5538         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5539         auto &&AtomicRedGen = [BO, VD,
5540                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5541                                     const Expr *EExpr, const Expr *UpExpr) {
5542           LValue X = CGF.EmitLValue(XExpr);
5543           RValue E;
5544           if (EExpr)
5545             E = CGF.EmitAnyExpr(EExpr);
5546           CGF.EmitOMPAtomicSimpleUpdateExpr(
5547               X, E, BO, /*IsXLHSInRHSPart=*/true,
5548               llvm::AtomicOrdering::Monotonic, Loc,
5549               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5550                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5551                 PrivateScope.addPrivate(
5552                     VD, [&CGF, VD, XRValue, Loc]() -> Address {
5553                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5554                       CGF.emitOMPSimpleStore(
5555                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5556                           VD->getType().getNonReferenceType(), Loc);
5557                       return LHSTemp;
5558                     });
5559                 (void)PrivateScope.Privatize();
5560                 return CGF.EmitAnyExpr(UpExpr);
5561               });
5562         };
5563         if ((*IPriv)->getType()->isArrayType()) {
5564           // Emit atomic reduction for array section.
5565           auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5566           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5567                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5568         } else
5569           // Emit atomic reduction for array subscript or single variable.
5570           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5571       } else {
5572         // Emit as a critical region.
5573         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5574                                      const Expr *, const Expr *) {
5575           auto &RT = CGF.CGM.getOpenMPRuntime();
5576           RT.emitCriticalRegion(
5577               CGF, ".atomic_reduction",
5578               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5579                 Action.Enter(CGF);
5580                 emitReductionCombiner(CGF, E);
5581               },
5582               Loc);
5583         };
5584         if ((*IPriv)->getType()->isArrayType()) {
5585           auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5586           auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5587           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5588                                     CritRedGen);
5589         } else
5590           CritRedGen(CGF, nullptr, nullptr, nullptr);
5591       }
5592       ++ILHS;
5593       ++IRHS;
5594       ++IPriv;
5595     }
5596   };
5597   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5598   if (!WithNowait) {
5599     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5600     llvm::Value *EndArgs[] = {
5601         IdentTLoc, // ident_t *<loc>
5602         ThreadId,  // i32 <gtid>
5603         Lock       // kmp_critical_name *&<lock>
5604     };
5605     CommonActionTy Action(nullptr, llvm::None,
5606                           createRuntimeFunction(OMPRTL__kmpc_end_reduce),
5607                           EndArgs);
5608     AtomicRCG.setAction(Action);
5609     AtomicRCG(CGF);
5610   } else
5611     AtomicRCG(CGF);
5612 
5613   CGF.EmitBranch(DefaultBB);
5614   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5615 }
5616 
5617 /// Generates unique name for artificial threadprivate variables.
5618 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5619 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5620                                       const Expr *Ref) {
5621   SmallString<256> Buffer;
5622   llvm::raw_svector_ostream Out(Buffer);
5623   const clang::DeclRefExpr *DE;
5624   const VarDecl *D = ::getBaseDecl(Ref, DE);
5625   if (!D)
5626     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5627   D = D->getCanonicalDecl();
5628   Out << Prefix << "."
5629       << (D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D))
5630       << "_" << D->getCanonicalDecl()->getLocStart().getRawEncoding();
5631   return Out.str();
5632 }
5633 
5634 /// Emits reduction initializer function:
5635 /// \code
5636 /// void @.red_init(void* %arg) {
5637 /// %0 = bitcast void* %arg to <type>*
5638 /// store <type> <init>, <type>* %0
5639 /// ret void
5640 /// }
5641 /// \endcode
5642 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5643                                            SourceLocation Loc,
5644                                            ReductionCodeGen &RCG, unsigned N) {
5645   auto &C = CGM.getContext();
5646   FunctionArgList Args;
5647   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5648                           ImplicitParamDecl::Other);
5649   Args.emplace_back(&Param);
5650   auto &FnInfo =
5651       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5652   auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5653   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5654                                     ".red_init.", &CGM.getModule());
5655   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5656   CodeGenFunction CGF(CGM);
5657   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5658   Address PrivateAddr = CGF.EmitLoadOfPointer(
5659       CGF.GetAddrOfLocalVar(&Param),
5660       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5661   llvm::Value *Size = nullptr;
5662   // If the size of the reduction item is non-constant, load it from global
5663   // threadprivate variable.
5664   if (RCG.getSizes(N).second) {
5665     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5666         CGF, CGM.getContext().getSizeType(),
5667         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5668     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5669                                 CGM.getContext().getSizeType(), Loc);
5670   }
5671   RCG.emitAggregateType(CGF, N, Size);
5672   LValue SharedLVal;
5673   // If initializer uses initializer from declare reduction construct, emit a
5674   // pointer to the address of the original reduction item (reuired by reduction
5675   // initializer)
5676   if (RCG.usesReductionInitializer(N)) {
5677     Address SharedAddr =
5678         CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5679             CGF, CGM.getContext().VoidPtrTy,
5680             generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
5681     SharedAddr = CGF.EmitLoadOfPointer(
5682         SharedAddr,
5683         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5684     SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5685   } else {
5686     SharedLVal = CGF.MakeNaturalAlignAddrLValue(
5687         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5688         CGM.getContext().VoidPtrTy);
5689   }
5690   // Emit the initializer:
5691   // %0 = bitcast void* %arg to <type>*
5692   // store <type> <init>, <type>* %0
5693   RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
5694                          [](CodeGenFunction &) { return false; });
5695   CGF.FinishFunction();
5696   return Fn;
5697 }
5698 
5699 /// Emits reduction combiner function:
5700 /// \code
5701 /// void @.red_comb(void* %arg0, void* %arg1) {
5702 /// %lhs = bitcast void* %arg0 to <type>*
5703 /// %rhs = bitcast void* %arg1 to <type>*
5704 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5705 /// store <type> %2, <type>* %lhs
5706 /// ret void
5707 /// }
5708 /// \endcode
5709 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5710                                            SourceLocation Loc,
5711                                            ReductionCodeGen &RCG, unsigned N,
5712                                            const Expr *ReductionOp,
5713                                            const Expr *LHS, const Expr *RHS,
5714                                            const Expr *PrivateRef) {
5715   auto &C = CGM.getContext();
5716   auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5717   auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5718   FunctionArgList Args;
5719   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5720                                C.VoidPtrTy, ImplicitParamDecl::Other);
5721   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5722                             ImplicitParamDecl::Other);
5723   Args.emplace_back(&ParamInOut);
5724   Args.emplace_back(&ParamIn);
5725   auto &FnInfo =
5726       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5727   auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5728   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5729                                     ".red_comb.", &CGM.getModule());
5730   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5731   CodeGenFunction CGF(CGM);
5732   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5733   llvm::Value *Size = nullptr;
5734   // If the size of the reduction item is non-constant, load it from global
5735   // threadprivate variable.
5736   if (RCG.getSizes(N).second) {
5737     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5738         CGF, CGM.getContext().getSizeType(),
5739         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5740     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5741                                 CGM.getContext().getSizeType(), Loc);
5742   }
5743   RCG.emitAggregateType(CGF, N, Size);
5744   // Remap lhs and rhs variables to the addresses of the function arguments.
5745   // %lhs = bitcast void* %arg0 to <type>*
5746   // %rhs = bitcast void* %arg1 to <type>*
5747   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5748   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() -> Address {
5749     // Pull out the pointer to the variable.
5750     Address PtrAddr = CGF.EmitLoadOfPointer(
5751         CGF.GetAddrOfLocalVar(&ParamInOut),
5752         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5753     return CGF.Builder.CreateElementBitCast(
5754         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5755   });
5756   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() -> Address {
5757     // Pull out the pointer to the variable.
5758     Address PtrAddr = CGF.EmitLoadOfPointer(
5759         CGF.GetAddrOfLocalVar(&ParamIn),
5760         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5761     return CGF.Builder.CreateElementBitCast(
5762         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5763   });
5764   PrivateScope.Privatize();
5765   // Emit the combiner body:
5766   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5767   // store <type> %2, <type>* %lhs
5768   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5769       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5770       cast<DeclRefExpr>(RHS));
5771   CGF.FinishFunction();
5772   return Fn;
5773 }
5774 
5775 /// Emits reduction finalizer function:
5776 /// \code
5777 /// void @.red_fini(void* %arg) {
5778 /// %0 = bitcast void* %arg to <type>*
5779 /// <destroy>(<type>* %0)
5780 /// ret void
5781 /// }
5782 /// \endcode
5783 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5784                                            SourceLocation Loc,
5785                                            ReductionCodeGen &RCG, unsigned N) {
5786   if (!RCG.needCleanups(N))
5787     return nullptr;
5788   auto &C = CGM.getContext();
5789   FunctionArgList Args;
5790   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5791                           ImplicitParamDecl::Other);
5792   Args.emplace_back(&Param);
5793   auto &FnInfo =
5794       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5795   auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5796   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5797                                     ".red_fini.", &CGM.getModule());
5798   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5799   CodeGenFunction CGF(CGM);
5800   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5801   Address PrivateAddr = CGF.EmitLoadOfPointer(
5802       CGF.GetAddrOfLocalVar(&Param),
5803       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5804   llvm::Value *Size = nullptr;
5805   // If the size of the reduction item is non-constant, load it from global
5806   // threadprivate variable.
5807   if (RCG.getSizes(N).second) {
5808     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5809         CGF, CGM.getContext().getSizeType(),
5810         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5811     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5812                                 CGM.getContext().getSizeType(), Loc);
5813   }
5814   RCG.emitAggregateType(CGF, N, Size);
5815   // Emit the finalizer body:
5816   // <destroy>(<type>* %0)
5817   RCG.emitCleanups(CGF, N, PrivateAddr);
5818   CGF.FinishFunction();
5819   return Fn;
5820 }
5821 
5822 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5823     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5824     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5825   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5826     return nullptr;
5827 
5828   // Build typedef struct:
5829   // kmp_task_red_input {
5830   //   void *reduce_shar; // shared reduction item
5831   //   size_t reduce_size; // size of data item
5832   //   void *reduce_init; // data initialization routine
5833   //   void *reduce_fini; // data finalization routine
5834   //   void *reduce_comb; // data combiner routine
5835   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
5836   // } kmp_task_red_input_t;
5837   ASTContext &C = CGM.getContext();
5838   auto *RD = C.buildImplicitRecord("kmp_task_red_input_t");
5839   RD->startDefinition();
5840   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5841   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5842   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5843   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5844   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5845   const FieldDecl *FlagsFD = addFieldToRecordDecl(
5846       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5847   RD->completeDefinition();
5848   QualType RDType = C.getRecordType(RD);
5849   unsigned Size = Data.ReductionVars.size();
5850   llvm::APInt ArraySize(/*numBits=*/64, Size);
5851   QualType ArrayRDType = C.getConstantArrayType(
5852       RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0);
5853   // kmp_task_red_input_t .rd_input.[Size];
5854   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5855   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
5856                        Data.ReductionOps);
5857   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5858     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5859     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5860                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5861     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5862         TaskRedInput.getPointer(), Idxs,
5863         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5864         ".rd_input.gep.");
5865     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
5866     // ElemLVal.reduce_shar = &Shareds[Cnt];
5867     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5868     RCG.emitSharedLValue(CGF, Cnt);
5869     llvm::Value *CastedShared =
5870         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer());
5871     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
5872     RCG.emitAggregateType(CGF, Cnt);
5873     llvm::Value *SizeValInChars;
5874     llvm::Value *SizeVal;
5875     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5876     // We use delayed creation/initialization for VLAs, array sections and
5877     // custom reduction initializations. It is required because runtime does not
5878     // provide the way to pass the sizes of VLAs/array sections to
5879     // initializer/combiner/finalizer functions and does not pass the pointer to
5880     // original reduction item to the initializer. Instead threadprivate global
5881     // variables are used to store these values and use them in the functions.
5882     bool DelayedCreation = !!SizeVal;
5883     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5884                                                /*isSigned=*/false);
5885     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5886     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5887     // ElemLVal.reduce_init = init;
5888     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5889     llvm::Value *InitAddr =
5890         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
5891     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5892     DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
5893     // ElemLVal.reduce_fini = fini;
5894     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5895     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5896     llvm::Value *FiniAddr = Fini
5897                                 ? CGF.EmitCastToVoidPtr(Fini)
5898                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5899     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5900     // ElemLVal.reduce_comb = comb;
5901     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5902     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
5903         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5904         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
5905     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5906     // ElemLVal.flags = 0;
5907     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5908     if (DelayedCreation) {
5909       CGF.EmitStoreOfScalar(
5910           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*IsSigned=*/true),
5911           FlagsLVal);
5912     } else
5913       CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
5914   }
5915   // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
5916   // *data);
5917   llvm::Value *Args[] = {
5918       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5919                                 /*isSigned=*/true),
5920       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5921       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
5922                                                       CGM.VoidPtrTy)};
5923   return CGF.EmitRuntimeCall(
5924       createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args);
5925 }
5926 
5927 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
5928                                               SourceLocation Loc,
5929                                               ReductionCodeGen &RCG,
5930                                               unsigned N) {
5931   auto Sizes = RCG.getSizes(N);
5932   // Emit threadprivate global variable if the type is non-constant
5933   // (Sizes.second = nullptr).
5934   if (Sizes.second) {
5935     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
5936                                                      /*isSigned=*/false);
5937     Address SizeAddr = getAddrOfArtificialThreadPrivate(
5938         CGF, CGM.getContext().getSizeType(),
5939         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5940     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
5941   }
5942   // Store address of the original reduction item if custom initializer is used.
5943   if (RCG.usesReductionInitializer(N)) {
5944     Address SharedAddr = getAddrOfArtificialThreadPrivate(
5945         CGF, CGM.getContext().VoidPtrTy,
5946         generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
5947     CGF.Builder.CreateStore(
5948         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5949             RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy),
5950         SharedAddr, /*IsVolatile=*/false);
5951   }
5952 }
5953 
5954 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
5955                                               SourceLocation Loc,
5956                                               llvm::Value *ReductionsPtr,
5957                                               LValue SharedLVal) {
5958   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5959   // *d);
5960   llvm::Value *Args[] = {
5961       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5962                                 /*isSigned=*/true),
5963       ReductionsPtr,
5964       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(),
5965                                                       CGM.VoidPtrTy)};
5966   return Address(
5967       CGF.EmitRuntimeCall(
5968           createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
5969       SharedLVal.getAlignment());
5970 }
5971 
5972 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
5973                                        SourceLocation Loc) {
5974   if (!CGF.HaveInsertPoint())
5975     return;
5976   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5977   // global_tid);
5978   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
5979   // Ignore return result until untied tasks are supported.
5980   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
5981   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5982     Region->emitUntiedSwitch(CGF);
5983 }
5984 
5985 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
5986                                            OpenMPDirectiveKind InnerKind,
5987                                            const RegionCodeGenTy &CodeGen,
5988                                            bool HasCancel) {
5989   if (!CGF.HaveInsertPoint())
5990     return;
5991   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
5992   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5993 }
5994 
5995 namespace {
5996 enum RTCancelKind {
5997   CancelNoreq = 0,
5998   CancelParallel = 1,
5999   CancelLoop = 2,
6000   CancelSections = 3,
6001   CancelTaskgroup = 4
6002 };
6003 } // anonymous namespace
6004 
6005 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6006   RTCancelKind CancelKind = CancelNoreq;
6007   if (CancelRegion == OMPD_parallel)
6008     CancelKind = CancelParallel;
6009   else if (CancelRegion == OMPD_for)
6010     CancelKind = CancelLoop;
6011   else if (CancelRegion == OMPD_sections)
6012     CancelKind = CancelSections;
6013   else {
6014     assert(CancelRegion == OMPD_taskgroup);
6015     CancelKind = CancelTaskgroup;
6016   }
6017   return CancelKind;
6018 }
6019 
6020 void CGOpenMPRuntime::emitCancellationPointCall(
6021     CodeGenFunction &CGF, SourceLocation Loc,
6022     OpenMPDirectiveKind CancelRegion) {
6023   if (!CGF.HaveInsertPoint())
6024     return;
6025   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6026   // global_tid, kmp_int32 cncl_kind);
6027   if (auto *OMPRegionInfo =
6028           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6029     // For 'cancellation point taskgroup', the task region info may not have a
6030     // cancel. This may instead happen in another adjacent task.
6031     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6032       llvm::Value *Args[] = {
6033           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6034           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6035       // Ignore return result until untied tasks are supported.
6036       auto *Result = CGF.EmitRuntimeCall(
6037           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
6038       // if (__kmpc_cancellationpoint()) {
6039       //   exit from construct;
6040       // }
6041       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
6042       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
6043       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
6044       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6045       CGF.EmitBlock(ExitBB);
6046       // exit from construct;
6047       auto CancelDest =
6048           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6049       CGF.EmitBranchThroughCleanup(CancelDest);
6050       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6051     }
6052   }
6053 }
6054 
6055 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6056                                      const Expr *IfCond,
6057                                      OpenMPDirectiveKind CancelRegion) {
6058   if (!CGF.HaveInsertPoint())
6059     return;
6060   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6061   // kmp_int32 cncl_kind);
6062   if (auto *OMPRegionInfo =
6063           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6064     auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
6065                                                         PrePostActionTy &) {
6066       auto &RT = CGF.CGM.getOpenMPRuntime();
6067       llvm::Value *Args[] = {
6068           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6069           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6070       // Ignore return result until untied tasks are supported.
6071       auto *Result = CGF.EmitRuntimeCall(
6072           RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
6073       // if (__kmpc_cancel()) {
6074       //   exit from construct;
6075       // }
6076       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
6077       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
6078       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
6079       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6080       CGF.EmitBlock(ExitBB);
6081       // exit from construct;
6082       auto CancelDest =
6083           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6084       CGF.EmitBranchThroughCleanup(CancelDest);
6085       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6086     };
6087     if (IfCond)
6088       emitOMPIfClause(CGF, IfCond, ThenGen,
6089                       [](CodeGenFunction &, PrePostActionTy &) {});
6090     else {
6091       RegionCodeGenTy ThenRCG(ThenGen);
6092       ThenRCG(CGF);
6093     }
6094   }
6095 }
6096 
6097 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6098     const OMPExecutableDirective &D, StringRef ParentName,
6099     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6100     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6101   assert(!ParentName.empty() && "Invalid target region parent name!");
6102 
6103   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6104                                    IsOffloadEntry, CodeGen);
6105 }
6106 
6107 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6108     const OMPExecutableDirective &D, StringRef ParentName,
6109     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6110     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6111   // Create a unique name for the entry function using the source location
6112   // information of the current target region. The name will be something like:
6113   //
6114   // __omp_offloading_DD_FFFF_PP_lBB
6115   //
6116   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6117   // mangled name of the function that encloses the target region and BB is the
6118   // line number of the target region.
6119 
6120   unsigned DeviceID;
6121   unsigned FileID;
6122   unsigned Line;
6123   getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID,
6124                            Line);
6125   SmallString<64> EntryFnName;
6126   {
6127     llvm::raw_svector_ostream OS(EntryFnName);
6128     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6129        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6130   }
6131 
6132   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6133 
6134   CodeGenFunction CGF(CGM, true);
6135   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6136   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6137 
6138   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
6139 
6140   // If this target outline function is not an offload entry, we don't need to
6141   // register it.
6142   if (!IsOffloadEntry)
6143     return;
6144 
6145   // The target region ID is used by the runtime library to identify the current
6146   // target region, so it only has to be unique and not necessarily point to
6147   // anything. It could be the pointer to the outlined function that implements
6148   // the target region, but we aren't using that so that the compiler doesn't
6149   // need to keep that, and could therefore inline the host function if proven
6150   // worthwhile during optimization. In the other hand, if emitting code for the
6151   // device, the ID has to be the function address so that it can retrieved from
6152   // the offloading entry and launched by the runtime library. We also mark the
6153   // outlined function to have external linkage in case we are emitting code for
6154   // the device, because these functions will be entry points to the device.
6155 
6156   if (CGM.getLangOpts().OpenMPIsDevice) {
6157     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6158     OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage);
6159     OutlinedFn->setDSOLocal(false);
6160   } else
6161     OutlinedFnID = new llvm::GlobalVariable(
6162         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6163         llvm::GlobalValue::PrivateLinkage,
6164         llvm::Constant::getNullValue(CGM.Int8Ty), ".omp_offload.region_id");
6165 
6166   // Register the information for the entry associated with this target region.
6167   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6168       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6169       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6170 }
6171 
6172 /// discard all CompoundStmts intervening between two constructs
6173 static const Stmt *ignoreCompoundStmts(const Stmt *Body) {
6174   while (auto *CS = dyn_cast_or_null<CompoundStmt>(Body))
6175     Body = CS->body_front();
6176 
6177   return Body;
6178 }
6179 
6180 /// Emit the number of teams for a target directive.  Inspect the num_teams
6181 /// clause associated with a teams construct combined or closely nested
6182 /// with the target directive.
6183 ///
6184 /// Emit a team of size one for directives such as 'target parallel' that
6185 /// have no associated teams construct.
6186 ///
6187 /// Otherwise, return nullptr.
6188 static llvm::Value *
6189 emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
6190                                CodeGenFunction &CGF,
6191                                const OMPExecutableDirective &D) {
6192 
6193   assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
6194                                               "teams directive expected to be "
6195                                               "emitted only for the host!");
6196 
6197   auto &Bld = CGF.Builder;
6198 
6199   // If the target directive is combined with a teams directive:
6200   //   Return the value in the num_teams clause, if any.
6201   //   Otherwise, return 0 to denote the runtime default.
6202   if (isOpenMPTeamsDirective(D.getDirectiveKind())) {
6203     if (const auto *NumTeamsClause = D.getSingleClause<OMPNumTeamsClause>()) {
6204       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6205       auto NumTeams = CGF.EmitScalarExpr(NumTeamsClause->getNumTeams(),
6206                                          /*IgnoreResultAssign*/ true);
6207       return Bld.CreateIntCast(NumTeams, CGF.Int32Ty,
6208                                /*IsSigned=*/true);
6209     }
6210 
6211     // The default value is 0.
6212     return Bld.getInt32(0);
6213   }
6214 
6215   // If the target directive is combined with a parallel directive but not a
6216   // teams directive, start one team.
6217   if (isOpenMPParallelDirective(D.getDirectiveKind()))
6218     return Bld.getInt32(1);
6219 
6220   // If the current target region has a teams region enclosed, we need to get
6221   // the number of teams to pass to the runtime function call. This is done
6222   // by generating the expression in a inlined region. This is required because
6223   // the expression is captured in the enclosing target environment when the
6224   // teams directive is not combined with target.
6225 
6226   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6227 
6228   if (auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>(
6229           ignoreCompoundStmts(CS.getCapturedStmt()))) {
6230     if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) {
6231       if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) {
6232         CGOpenMPInnerExprInfo CGInfo(CGF, CS);
6233         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6234         llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams());
6235         return Bld.CreateIntCast(NumTeams, CGF.Int32Ty,
6236                                  /*IsSigned=*/true);
6237       }
6238 
6239       // If we have an enclosed teams directive but no num_teams clause we use
6240       // the default value 0.
6241       return Bld.getInt32(0);
6242     }
6243   }
6244 
6245   // No teams associated with the directive.
6246   return nullptr;
6247 }
6248 
6249 /// Emit the number of threads for a target directive.  Inspect the
6250 /// thread_limit clause associated with a teams construct combined or closely
6251 /// nested with the target directive.
6252 ///
6253 /// Emit the num_threads clause for directives such as 'target parallel' that
6254 /// have no associated teams construct.
6255 ///
6256 /// Otherwise, return nullptr.
6257 static llvm::Value *
6258 emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
6259                                  CodeGenFunction &CGF,
6260                                  const OMPExecutableDirective &D) {
6261 
6262   assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
6263                                               "teams directive expected to be "
6264                                               "emitted only for the host!");
6265 
6266   auto &Bld = CGF.Builder;
6267 
6268   //
6269   // If the target directive is combined with a teams directive:
6270   //   Return the value in the thread_limit clause, if any.
6271   //
6272   // If the target directive is combined with a parallel directive:
6273   //   Return the value in the num_threads clause, if any.
6274   //
6275   // If both clauses are set, select the minimum of the two.
6276   //
6277   // If neither teams or parallel combined directives set the number of threads
6278   // in a team, return 0 to denote the runtime default.
6279   //
6280   // If this is not a teams directive return nullptr.
6281 
6282   if (isOpenMPTeamsDirective(D.getDirectiveKind()) ||
6283       isOpenMPParallelDirective(D.getDirectiveKind())) {
6284     llvm::Value *DefaultThreadLimitVal = Bld.getInt32(0);
6285     llvm::Value *NumThreadsVal = nullptr;
6286     llvm::Value *ThreadLimitVal = nullptr;
6287 
6288     if (const auto *ThreadLimitClause =
6289             D.getSingleClause<OMPThreadLimitClause>()) {
6290       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6291       auto ThreadLimit = CGF.EmitScalarExpr(ThreadLimitClause->getThreadLimit(),
6292                                             /*IgnoreResultAssign*/ true);
6293       ThreadLimitVal = Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty,
6294                                          /*IsSigned=*/true);
6295     }
6296 
6297     if (const auto *NumThreadsClause =
6298             D.getSingleClause<OMPNumThreadsClause>()) {
6299       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6300       llvm::Value *NumThreads =
6301           CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
6302                              /*IgnoreResultAssign*/ true);
6303       NumThreadsVal =
6304           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/true);
6305     }
6306 
6307     // Select the lesser of thread_limit and num_threads.
6308     if (NumThreadsVal)
6309       ThreadLimitVal = ThreadLimitVal
6310                            ? Bld.CreateSelect(Bld.CreateICmpSLT(NumThreadsVal,
6311                                                                 ThreadLimitVal),
6312                                               NumThreadsVal, ThreadLimitVal)
6313                            : NumThreadsVal;
6314 
6315     // Set default value passed to the runtime if either teams or a target
6316     // parallel type directive is found but no clause is specified.
6317     if (!ThreadLimitVal)
6318       ThreadLimitVal = DefaultThreadLimitVal;
6319 
6320     return ThreadLimitVal;
6321   }
6322 
6323   // If the current target region has a teams region enclosed, we need to get
6324   // the thread limit to pass to the runtime function call. This is done
6325   // by generating the expression in a inlined region. This is required because
6326   // the expression is captured in the enclosing target environment when the
6327   // teams directive is not combined with target.
6328 
6329   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6330 
6331   if (auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>(
6332           ignoreCompoundStmts(CS.getCapturedStmt()))) {
6333     if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) {
6334       if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) {
6335         CGOpenMPInnerExprInfo CGInfo(CGF, CS);
6336         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6337         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit());
6338         return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty,
6339                                          /*IsSigned=*/true);
6340       }
6341 
6342       // If we have an enclosed teams directive but no thread_limit clause we
6343       // use the default value 0.
6344       return CGF.Builder.getInt32(0);
6345     }
6346   }
6347 
6348   // No teams associated with the directive.
6349   return nullptr;
6350 }
6351 
6352 namespace {
6353 // \brief Utility to handle information from clauses associated with a given
6354 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6355 // It provides a convenient interface to obtain the information and generate
6356 // code for that information.
6357 class MappableExprsHandler {
6358 public:
6359   /// \brief Values for bit flags used to specify the mapping type for
6360   /// offloading.
6361   enum OpenMPOffloadMappingFlags {
6362     /// \brief Allocate memory on the device and move data from host to device.
6363     OMP_MAP_TO = 0x01,
6364     /// \brief Allocate memory on the device and move data from device to host.
6365     OMP_MAP_FROM = 0x02,
6366     /// \brief Always perform the requested mapping action on the element, even
6367     /// if it was already mapped before.
6368     OMP_MAP_ALWAYS = 0x04,
6369     /// \brief Delete the element from the device environment, ignoring the
6370     /// current reference count associated with the element.
6371     OMP_MAP_DELETE = 0x08,
6372     /// \brief The element being mapped is a pointer-pointee pair; both the
6373     /// pointer and the pointee should be mapped.
6374     OMP_MAP_PTR_AND_OBJ = 0x10,
6375     /// \brief This flags signals that the base address of an entry should be
6376     /// passed to the target kernel as an argument.
6377     OMP_MAP_TARGET_PARAM = 0x20,
6378     /// \brief Signal that the runtime library has to return the device pointer
6379     /// in the current position for the data being mapped. Used when we have the
6380     /// use_device_ptr clause.
6381     OMP_MAP_RETURN_PARAM = 0x40,
6382     /// \brief This flag signals that the reference being passed is a pointer to
6383     /// private data.
6384     OMP_MAP_PRIVATE = 0x80,
6385     /// \brief Pass the element to the device by value.
6386     OMP_MAP_LITERAL = 0x100,
6387     /// Implicit map
6388     OMP_MAP_IMPLICIT = 0x200,
6389   };
6390 
6391   /// Class that associates information with a base pointer to be passed to the
6392   /// runtime library.
6393   class BasePointerInfo {
6394     /// The base pointer.
6395     llvm::Value *Ptr = nullptr;
6396     /// The base declaration that refers to this device pointer, or null if
6397     /// there is none.
6398     const ValueDecl *DevPtrDecl = nullptr;
6399 
6400   public:
6401     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
6402         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
6403     llvm::Value *operator*() const { return Ptr; }
6404     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
6405     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
6406   };
6407 
6408   typedef SmallVector<BasePointerInfo, 16> MapBaseValuesArrayTy;
6409   typedef SmallVector<llvm::Value *, 16> MapValuesArrayTy;
6410   typedef SmallVector<uint64_t, 16> MapFlagsArrayTy;
6411 
6412 private:
6413   /// \brief Directive from where the map clauses were extracted.
6414   const OMPExecutableDirective &CurDir;
6415 
6416   /// \brief Function the directive is being generated for.
6417   CodeGenFunction &CGF;
6418 
6419   /// \brief Set of all first private variables in the current directive.
6420   llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls;
6421   /// Set of all reduction variables in the current directive.
6422   llvm::SmallPtrSet<const VarDecl *, 8> ReductionDecls;
6423 
6424   /// Map between device pointer declarations and their expression components.
6425   /// The key value for declarations in 'this' is null.
6426   llvm::DenseMap<
6427       const ValueDecl *,
6428       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6429       DevPointersMap;
6430 
6431   llvm::Value *getExprTypeSize(const Expr *E) const {
6432     auto ExprTy = E->getType().getCanonicalType();
6433 
6434     // Reference types are ignored for mapping purposes.
6435     if (auto *RefTy = ExprTy->getAs<ReferenceType>())
6436       ExprTy = RefTy->getPointeeType().getCanonicalType();
6437 
6438     // Given that an array section is considered a built-in type, we need to
6439     // do the calculation based on the length of the section instead of relying
6440     // on CGF.getTypeSize(E->getType()).
6441     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
6442       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
6443                             OAE->getBase()->IgnoreParenImpCasts())
6444                             .getCanonicalType();
6445 
6446       // If there is no length associated with the expression, that means we
6447       // are using the whole length of the base.
6448       if (!OAE->getLength() && OAE->getColonLoc().isValid())
6449         return CGF.getTypeSize(BaseTy);
6450 
6451       llvm::Value *ElemSize;
6452       if (auto *PTy = BaseTy->getAs<PointerType>())
6453         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
6454       else {
6455         auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
6456         assert(ATy && "Expecting array type if not a pointer type.");
6457         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
6458       }
6459 
6460       // If we don't have a length at this point, that is because we have an
6461       // array section with a single element.
6462       if (!OAE->getLength())
6463         return ElemSize;
6464 
6465       auto *LengthVal = CGF.EmitScalarExpr(OAE->getLength());
6466       LengthVal =
6467           CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false);
6468       return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
6469     }
6470     return CGF.getTypeSize(ExprTy);
6471   }
6472 
6473   /// \brief Return the corresponding bits for a given map clause modifier. Add
6474   /// a flag marking the map as a pointer if requested. Add a flag marking the
6475   /// map as the first one of a series of maps that relate to the same map
6476   /// expression.
6477   uint64_t getMapTypeBits(OpenMPMapClauseKind MapType,
6478                           OpenMPMapClauseKind MapTypeModifier, bool AddPtrFlag,
6479                           bool AddIsTargetParamFlag) const {
6480     uint64_t Bits = 0u;
6481     switch (MapType) {
6482     case OMPC_MAP_alloc:
6483     case OMPC_MAP_release:
6484       // alloc and release is the default behavior in the runtime library,  i.e.
6485       // if we don't pass any bits alloc/release that is what the runtime is
6486       // going to do. Therefore, we don't need to signal anything for these two
6487       // type modifiers.
6488       break;
6489     case OMPC_MAP_to:
6490       Bits = OMP_MAP_TO;
6491       break;
6492     case OMPC_MAP_from:
6493       Bits = OMP_MAP_FROM;
6494       break;
6495     case OMPC_MAP_tofrom:
6496       Bits = OMP_MAP_TO | OMP_MAP_FROM;
6497       break;
6498     case OMPC_MAP_delete:
6499       Bits = OMP_MAP_DELETE;
6500       break;
6501     default:
6502       llvm_unreachable("Unexpected map type!");
6503       break;
6504     }
6505     if (AddPtrFlag)
6506       Bits |= OMP_MAP_PTR_AND_OBJ;
6507     if (AddIsTargetParamFlag)
6508       Bits |= OMP_MAP_TARGET_PARAM;
6509     if (MapTypeModifier == OMPC_MAP_always)
6510       Bits |= OMP_MAP_ALWAYS;
6511     return Bits;
6512   }
6513 
6514   /// \brief Return true if the provided expression is a final array section. A
6515   /// final array section, is one whose length can't be proved to be one.
6516   bool isFinalArraySectionExpression(const Expr *E) const {
6517     auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
6518 
6519     // It is not an array section and therefore not a unity-size one.
6520     if (!OASE)
6521       return false;
6522 
6523     // An array section with no colon always refer to a single element.
6524     if (OASE->getColonLoc().isInvalid())
6525       return false;
6526 
6527     auto *Length = OASE->getLength();
6528 
6529     // If we don't have a length we have to check if the array has size 1
6530     // for this dimension. Also, we should always expect a length if the
6531     // base type is pointer.
6532     if (!Length) {
6533       auto BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
6534                          OASE->getBase()->IgnoreParenImpCasts())
6535                          .getCanonicalType();
6536       if (auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
6537         return ATy->getSize().getSExtValue() != 1;
6538       // If we don't have a constant dimension length, we have to consider
6539       // the current section as having any size, so it is not necessarily
6540       // unitary. If it happen to be unity size, that's user fault.
6541       return true;
6542     }
6543 
6544     // Check if the length evaluates to 1.
6545     llvm::APSInt ConstLength;
6546     if (!Length->EvaluateAsInt(ConstLength, CGF.getContext()))
6547       return true; // Can have more that size 1.
6548 
6549     return ConstLength.getSExtValue() != 1;
6550   }
6551 
6552   /// \brief Return the adjusted map modifiers if the declaration a capture
6553   /// refers to appears in a first-private clause. This is expected to be used
6554   /// only with directives that start with 'target'.
6555   unsigned adjustMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap,
6556                                                unsigned CurrentModifiers) {
6557     assert(Cap.capturesVariable() && "Expected capture by reference only!");
6558 
6559     // A first private variable captured by reference will use only the
6560     // 'private ptr' and 'map to' flag. Return the right flags if the captured
6561     // declaration is known as first-private in this handler.
6562     if (FirstPrivateDecls.count(Cap.getCapturedVar()))
6563       return MappableExprsHandler::OMP_MAP_PRIVATE |
6564              MappableExprsHandler::OMP_MAP_TO;
6565     // Reduction variable  will use only the 'private ptr' and 'map to_from'
6566     // flag.
6567     if (ReductionDecls.count(Cap.getCapturedVar())) {
6568       return MappableExprsHandler::OMP_MAP_TO |
6569              MappableExprsHandler::OMP_MAP_FROM;
6570     }
6571 
6572     // We didn't modify anything.
6573     return CurrentModifiers;
6574   }
6575 
6576 public:
6577   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
6578       : CurDir(Dir), CGF(CGF) {
6579     // Extract firstprivate clause information.
6580     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
6581       for (const auto *D : C->varlists())
6582         FirstPrivateDecls.insert(
6583             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
6584     for (const auto *C : Dir.getClausesOfKind<OMPReductionClause>()) {
6585       for (const auto *D : C->varlists()) {
6586         ReductionDecls.insert(
6587             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
6588       }
6589     }
6590     // Extract device pointer clause information.
6591     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
6592       for (auto L : C->component_lists())
6593         DevPointersMap[L.first].push_back(L.second);
6594   }
6595 
6596   /// \brief Generate the base pointers, section pointers, sizes and map type
6597   /// bits for the provided map type, map modifier, and expression components.
6598   /// \a IsFirstComponent should be set to true if the provided set of
6599   /// components is the first associated with a capture.
6600   void generateInfoForComponentList(
6601       OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier,
6602       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6603       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
6604       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
6605       bool IsFirstComponentList, bool IsImplicit) const {
6606 
6607     // The following summarizes what has to be generated for each map and the
6608     // types bellow. The generated information is expressed in this order:
6609     // base pointer, section pointer, size, flags
6610     // (to add to the ones that come from the map type and modifier).
6611     //
6612     // double d;
6613     // int i[100];
6614     // float *p;
6615     //
6616     // struct S1 {
6617     //   int i;
6618     //   float f[50];
6619     // }
6620     // struct S2 {
6621     //   int i;
6622     //   float f[50];
6623     //   S1 s;
6624     //   double *p;
6625     //   struct S2 *ps;
6626     // }
6627     // S2 s;
6628     // S2 *ps;
6629     //
6630     // map(d)
6631     // &d, &d, sizeof(double), noflags
6632     //
6633     // map(i)
6634     // &i, &i, 100*sizeof(int), noflags
6635     //
6636     // map(i[1:23])
6637     // &i(=&i[0]), &i[1], 23*sizeof(int), noflags
6638     //
6639     // map(p)
6640     // &p, &p, sizeof(float*), noflags
6641     //
6642     // map(p[1:24])
6643     // p, &p[1], 24*sizeof(float), noflags
6644     //
6645     // map(s)
6646     // &s, &s, sizeof(S2), noflags
6647     //
6648     // map(s.i)
6649     // &s, &(s.i), sizeof(int), noflags
6650     //
6651     // map(s.s.f)
6652     // &s, &(s.i.f), 50*sizeof(int), noflags
6653     //
6654     // map(s.p)
6655     // &s, &(s.p), sizeof(double*), noflags
6656     //
6657     // map(s.p[:22], s.a s.b)
6658     // &s, &(s.p), sizeof(double*), noflags
6659     // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag
6660     //
6661     // map(s.ps)
6662     // &s, &(s.ps), sizeof(S2*), noflags
6663     //
6664     // map(s.ps->s.i)
6665     // &s, &(s.ps), sizeof(S2*), noflags
6666     // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag
6667     //
6668     // map(s.ps->ps)
6669     // &s, &(s.ps), sizeof(S2*), noflags
6670     // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag
6671     //
6672     // map(s.ps->ps->ps)
6673     // &s, &(s.ps), sizeof(S2*), noflags
6674     // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag
6675     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag
6676     //
6677     // map(s.ps->ps->s.f[:22])
6678     // &s, &(s.ps), sizeof(S2*), noflags
6679     // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag
6680     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag
6681     //
6682     // map(ps)
6683     // &ps, &ps, sizeof(S2*), noflags
6684     //
6685     // map(ps->i)
6686     // ps, &(ps->i), sizeof(int), noflags
6687     //
6688     // map(ps->s.f)
6689     // ps, &(ps->s.f[0]), 50*sizeof(float), noflags
6690     //
6691     // map(ps->p)
6692     // ps, &(ps->p), sizeof(double*), noflags
6693     //
6694     // map(ps->p[:22])
6695     // ps, &(ps->p), sizeof(double*), noflags
6696     // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag
6697     //
6698     // map(ps->ps)
6699     // ps, &(ps->ps), sizeof(S2*), noflags
6700     //
6701     // map(ps->ps->s.i)
6702     // ps, &(ps->ps), sizeof(S2*), noflags
6703     // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag
6704     //
6705     // map(ps->ps->ps)
6706     // ps, &(ps->ps), sizeof(S2*), noflags
6707     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag
6708     //
6709     // map(ps->ps->ps->ps)
6710     // ps, &(ps->ps), sizeof(S2*), noflags
6711     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag
6712     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag
6713     //
6714     // map(ps->ps->ps->s.f[:22])
6715     // ps, &(ps->ps), sizeof(S2*), noflags
6716     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag
6717     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag
6718 
6719     // Track if the map information being generated is the first for a capture.
6720     bool IsCaptureFirstInfo = IsFirstComponentList;
6721     bool IsLink = false; // Is this variable a "declare target link"?
6722 
6723     // Scan the components from the base to the complete expression.
6724     auto CI = Components.rbegin();
6725     auto CE = Components.rend();
6726     auto I = CI;
6727 
6728     // Track if the map information being generated is the first for a list of
6729     // components.
6730     bool IsExpressionFirstInfo = true;
6731     llvm::Value *BP = nullptr;
6732 
6733     if (auto *ME = dyn_cast<MemberExpr>(I->getAssociatedExpression())) {
6734       // The base is the 'this' pointer. The content of the pointer is going
6735       // to be the base of the field being mapped.
6736       BP = CGF.EmitScalarExpr(ME->getBase());
6737     } else {
6738       // The base is the reference to the variable.
6739       // BP = &Var.
6740       BP = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getPointer();
6741       if (const auto *VD =
6742               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
6743         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
6744             isDeclareTargetDeclaration(VD)) {
6745           assert(*Res == OMPDeclareTargetDeclAttr::MT_Link &&
6746                  "Declare target link is expected.");
6747           // Avoid warning in release build.
6748           (void)*Res;
6749           IsLink = true;
6750           BP = CGF.CGM.getOpenMPRuntime()
6751                    .getAddrOfDeclareTargetLink(VD)
6752                    .getPointer();
6753         }
6754       }
6755 
6756       // If the variable is a pointer and is being dereferenced (i.e. is not
6757       // the last component), the base has to be the pointer itself, not its
6758       // reference. References are ignored for mapping purposes.
6759       QualType Ty =
6760           I->getAssociatedDeclaration()->getType().getNonReferenceType();
6761       if (Ty->isAnyPointerType() && std::next(I) != CE) {
6762         auto PtrAddr = CGF.MakeNaturalAlignAddrLValue(BP, Ty);
6763         BP = CGF.EmitLoadOfPointerLValue(PtrAddr.getAddress(),
6764                                          Ty->castAs<PointerType>())
6765                  .getPointer();
6766 
6767         // We do not need to generate individual map information for the
6768         // pointer, it can be associated with the combined storage.
6769         ++I;
6770       }
6771     }
6772 
6773     uint64_t DefaultFlags = IsImplicit ? OMP_MAP_IMPLICIT : 0;
6774     for (; I != CE; ++I) {
6775       auto Next = std::next(I);
6776 
6777       // We need to generate the addresses and sizes if this is the last
6778       // component, if the component is a pointer or if it is an array section
6779       // whose length can't be proved to be one. If this is a pointer, it
6780       // becomes the base address for the following components.
6781 
6782       // A final array section, is one whose length can't be proved to be one.
6783       bool IsFinalArraySection =
6784           isFinalArraySectionExpression(I->getAssociatedExpression());
6785 
6786       // Get information on whether the element is a pointer. Have to do a
6787       // special treatment for array sections given that they are built-in
6788       // types.
6789       const auto *OASE =
6790           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
6791       bool IsPointer =
6792           (OASE &&
6793            OMPArraySectionExpr::getBaseOriginalType(OASE)
6794                .getCanonicalType()
6795                ->isAnyPointerType()) ||
6796           I->getAssociatedExpression()->getType()->isAnyPointerType();
6797 
6798       if (Next == CE || IsPointer || IsFinalArraySection) {
6799 
6800         // If this is not the last component, we expect the pointer to be
6801         // associated with an array expression or member expression.
6802         assert((Next == CE ||
6803                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
6804                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
6805                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
6806                "Unexpected expression");
6807 
6808         llvm::Value *LB =
6809             CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getPointer();
6810         auto *Size = getExprTypeSize(I->getAssociatedExpression());
6811 
6812         // If we have a member expression and the current component is a
6813         // reference, we have to map the reference too. Whenever we have a
6814         // reference, the section that reference refers to is going to be a
6815         // load instruction from the storage assigned to the reference.
6816         if (isa<MemberExpr>(I->getAssociatedExpression()) &&
6817             I->getAssociatedDeclaration()->getType()->isReferenceType()) {
6818           auto *LI = cast<llvm::LoadInst>(LB);
6819           auto *RefAddr = LI->getPointerOperand();
6820 
6821           BasePointers.push_back(BP);
6822           Pointers.push_back(RefAddr);
6823           Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
6824           Types.push_back(DefaultFlags |
6825                           getMapTypeBits(
6826                               /*MapType*/ OMPC_MAP_alloc,
6827                               /*MapTypeModifier=*/OMPC_MAP_unknown,
6828                               !IsExpressionFirstInfo, IsCaptureFirstInfo));
6829           IsExpressionFirstInfo = false;
6830           IsCaptureFirstInfo = false;
6831           // The reference will be the next base address.
6832           BP = RefAddr;
6833         }
6834 
6835         BasePointers.push_back(BP);
6836         Pointers.push_back(LB);
6837         Sizes.push_back(Size);
6838 
6839         // We need to add a pointer flag for each map that comes from the
6840         // same expression except for the first one. We also need to signal
6841         // this map is the first one that relates with the current capture
6842         // (there is a set of entries for each capture).
6843         Types.push_back(DefaultFlags |
6844                         getMapTypeBits(MapType, MapTypeModifier,
6845                                        !IsExpressionFirstInfo || IsLink,
6846                                        IsCaptureFirstInfo && !IsLink));
6847 
6848         // If we have a final array section, we are done with this expression.
6849         if (IsFinalArraySection)
6850           break;
6851 
6852         // The pointer becomes the base for the next element.
6853         if (Next != CE)
6854           BP = LB;
6855 
6856         IsExpressionFirstInfo = false;
6857         IsCaptureFirstInfo = false;
6858       }
6859     }
6860   }
6861 
6862   /// \brief Generate all the base pointers, section pointers, sizes and map
6863   /// types for the extracted mappable expressions. Also, for each item that
6864   /// relates with a device pointer, a pair of the relevant declaration and
6865   /// index where it occurs is appended to the device pointers info array.
6866   void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
6867                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
6868                        MapFlagsArrayTy &Types) const {
6869     BasePointers.clear();
6870     Pointers.clear();
6871     Sizes.clear();
6872     Types.clear();
6873 
6874     struct MapInfo {
6875       /// Kind that defines how a device pointer has to be returned.
6876       enum ReturnPointerKind {
6877         // Don't have to return any pointer.
6878         RPK_None,
6879         // Pointer is the base of the declaration.
6880         RPK_Base,
6881         // Pointer is a member of the base declaration - 'this'
6882         RPK_Member,
6883         // Pointer is a reference and a member of the base declaration - 'this'
6884         RPK_MemberReference,
6885       };
6886       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
6887       OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
6888       OpenMPMapClauseKind MapTypeModifier = OMPC_MAP_unknown;
6889       ReturnPointerKind ReturnDevicePointer = RPK_None;
6890       bool IsImplicit = false;
6891 
6892       MapInfo() = default;
6893       MapInfo(
6894           OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6895           OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier,
6896           ReturnPointerKind ReturnDevicePointer, bool IsImplicit)
6897           : Components(Components), MapType(MapType),
6898             MapTypeModifier(MapTypeModifier),
6899             ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
6900     };
6901 
6902     // We have to process the component lists that relate with the same
6903     // declaration in a single chunk so that we can generate the map flags
6904     // correctly. Therefore, we organize all lists in a map.
6905     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
6906 
6907     // Helper function to fill the information map for the different supported
6908     // clauses.
6909     auto &&InfoGen = [&Info](
6910         const ValueDecl *D,
6911         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
6912         OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier,
6913         MapInfo::ReturnPointerKind ReturnDevicePointer, bool IsImplicit) {
6914       const ValueDecl *VD =
6915           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
6916       Info[VD].emplace_back(L, MapType, MapModifier, ReturnDevicePointer,
6917                             IsImplicit);
6918     };
6919 
6920     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
6921     for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
6922       for (auto L : C->component_lists()) {
6923         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier(),
6924                 MapInfo::RPK_None, C->isImplicit());
6925       }
6926     for (auto *C : this->CurDir.getClausesOfKind<OMPToClause>())
6927       for (auto L : C->component_lists()) {
6928         InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown,
6929                 MapInfo::RPK_None, C->isImplicit());
6930       }
6931     for (auto *C : this->CurDir.getClausesOfKind<OMPFromClause>())
6932       for (auto L : C->component_lists()) {
6933         InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown,
6934                 MapInfo::RPK_None, C->isImplicit());
6935       }
6936 
6937     // Look at the use_device_ptr clause information and mark the existing map
6938     // entries as such. If there is no map information for an entry in the
6939     // use_device_ptr list, we create one with map type 'alloc' and zero size
6940     // section. It is the user fault if that was not mapped before.
6941     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
6942     for (auto *C : this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>())
6943       for (auto L : C->component_lists()) {
6944         assert(!L.second.empty() && "Not expecting empty list of components!");
6945         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
6946         VD = cast<ValueDecl>(VD->getCanonicalDecl());
6947         auto *IE = L.second.back().getAssociatedExpression();
6948         // If the first component is a member expression, we have to look into
6949         // 'this', which maps to null in the map of map information. Otherwise
6950         // look directly for the information.
6951         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
6952 
6953         // We potentially have map information for this declaration already.
6954         // Look for the first set of components that refer to it.
6955         if (It != Info.end()) {
6956           auto CI = std::find_if(
6957               It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
6958                 return MI.Components.back().getAssociatedDeclaration() == VD;
6959               });
6960           // If we found a map entry, signal that the pointer has to be returned
6961           // and move on to the next declaration.
6962           if (CI != It->second.end()) {
6963             CI->ReturnDevicePointer = isa<MemberExpr>(IE)
6964                                           ? (VD->getType()->isReferenceType()
6965                                                  ? MapInfo::RPK_MemberReference
6966                                                  : MapInfo::RPK_Member)
6967                                           : MapInfo::RPK_Base;
6968             continue;
6969           }
6970         }
6971 
6972         // We didn't find any match in our map information - generate a zero
6973         // size array section.
6974         // FIXME: MSVC 2013 seems to require this-> to find member CGF.
6975         llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(IE),
6976                                                       IE->getExprLoc());
6977         BasePointers.push_back({Ptr, VD});
6978         Pointers.push_back(Ptr);
6979         Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy));
6980         Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
6981       }
6982 
6983     for (auto &M : Info) {
6984       // We need to know when we generate information for the first component
6985       // associated with a capture, because the mapping flags depend on it.
6986       bool IsFirstComponentList = true;
6987       for (MapInfo &L : M.second) {
6988         assert(!L.Components.empty() &&
6989                "Not expecting declaration with no component lists.");
6990 
6991         // Remember the current base pointer index.
6992         unsigned CurrentBasePointersIdx = BasePointers.size();
6993         // FIXME: MSVC 2013 seems to require this-> to find the member method.
6994         this->generateInfoForComponentList(
6995             L.MapType, L.MapTypeModifier, L.Components, BasePointers, Pointers,
6996             Sizes, Types, IsFirstComponentList, L.IsImplicit);
6997 
6998         // If this entry relates with a device pointer, set the relevant
6999         // declaration and add the 'return pointer' flag.
7000         if (IsFirstComponentList &&
7001             L.ReturnDevicePointer != MapInfo::RPK_None) {
7002           // If the pointer is not the base of the map, we need to skip the
7003           // base. If it is a reference in a member field, we also need to skip
7004           // the map of the reference.
7005           if (L.ReturnDevicePointer != MapInfo::RPK_Base) {
7006             ++CurrentBasePointersIdx;
7007             if (L.ReturnDevicePointer == MapInfo::RPK_MemberReference)
7008               ++CurrentBasePointersIdx;
7009           }
7010           assert(BasePointers.size() > CurrentBasePointersIdx &&
7011                  "Unexpected number of mapped base pointers.");
7012 
7013           auto *RelevantVD = L.Components.back().getAssociatedDeclaration();
7014           assert(RelevantVD &&
7015                  "No relevant declaration related with device pointer??");
7016 
7017           BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
7018           Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
7019         }
7020         IsFirstComponentList = false;
7021       }
7022     }
7023   }
7024 
7025   /// \brief Generate the base pointers, section pointers, sizes and map types
7026   /// associated to a given capture.
7027   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
7028                               llvm::Value *Arg,
7029                               MapBaseValuesArrayTy &BasePointers,
7030                               MapValuesArrayTy &Pointers,
7031                               MapValuesArrayTy &Sizes,
7032                               MapFlagsArrayTy &Types) const {
7033     assert(!Cap->capturesVariableArrayType() &&
7034            "Not expecting to generate map info for a variable array type!");
7035 
7036     BasePointers.clear();
7037     Pointers.clear();
7038     Sizes.clear();
7039     Types.clear();
7040 
7041     // We need to know when we generating information for the first component
7042     // associated with a capture, because the mapping flags depend on it.
7043     bool IsFirstComponentList = true;
7044 
7045     const ValueDecl *VD =
7046         Cap->capturesThis()
7047             ? nullptr
7048             : Cap->getCapturedVar()->getCanonicalDecl();
7049 
7050     // If this declaration appears in a is_device_ptr clause we just have to
7051     // pass the pointer by value. If it is a reference to a declaration, we just
7052     // pass its value, otherwise, if it is a member expression, we need to map
7053     // 'to' the field.
7054     if (!VD) {
7055       auto It = DevPointersMap.find(VD);
7056       if (It != DevPointersMap.end()) {
7057         for (auto L : It->second) {
7058           generateInfoForComponentList(
7059               /*MapType=*/OMPC_MAP_to, /*MapTypeModifier=*/OMPC_MAP_unknown, L,
7060               BasePointers, Pointers, Sizes, Types, IsFirstComponentList,
7061               /*IsImplicit=*/false);
7062           IsFirstComponentList = false;
7063         }
7064         return;
7065       }
7066     } else if (DevPointersMap.count(VD)) {
7067       BasePointers.push_back({Arg, VD});
7068       Pointers.push_back(Arg);
7069       Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
7070       Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
7071       return;
7072     }
7073 
7074     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
7075     for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
7076       for (auto L : C->decl_component_lists(VD)) {
7077         assert(L.first == VD &&
7078                "We got information for the wrong declaration??");
7079         assert(!L.second.empty() &&
7080                "Not expecting declaration with no component lists.");
7081         generateInfoForComponentList(
7082             C->getMapType(), C->getMapTypeModifier(), L.second, BasePointers,
7083             Pointers, Sizes, Types, IsFirstComponentList, C->isImplicit());
7084         IsFirstComponentList = false;
7085       }
7086 
7087     return;
7088   }
7089 
7090   /// \brief Generate the default map information for a given capture \a CI,
7091   /// record field declaration \a RI and captured value \a CV.
7092   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
7093                               const FieldDecl &RI, llvm::Value *CV,
7094                               MapBaseValuesArrayTy &CurBasePointers,
7095                               MapValuesArrayTy &CurPointers,
7096                               MapValuesArrayTy &CurSizes,
7097                               MapFlagsArrayTy &CurMapTypes) {
7098 
7099     // Do the default mapping.
7100     if (CI.capturesThis()) {
7101       CurBasePointers.push_back(CV);
7102       CurPointers.push_back(CV);
7103       const PointerType *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
7104       CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType()));
7105       // Default map type.
7106       CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
7107     } else if (CI.capturesVariableByCopy()) {
7108       CurBasePointers.push_back(CV);
7109       CurPointers.push_back(CV);
7110       if (!RI.getType()->isAnyPointerType()) {
7111         // We have to signal to the runtime captures passed by value that are
7112         // not pointers.
7113         CurMapTypes.push_back(OMP_MAP_LITERAL);
7114         CurSizes.push_back(CGF.getTypeSize(RI.getType()));
7115       } else {
7116         // Pointers are implicitly mapped with a zero size and no flags
7117         // (other than first map that is added for all implicit maps).
7118         CurMapTypes.push_back(0u);
7119         CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy));
7120       }
7121     } else {
7122       assert(CI.capturesVariable() && "Expected captured reference.");
7123       CurBasePointers.push_back(CV);
7124       CurPointers.push_back(CV);
7125 
7126       const ReferenceType *PtrTy =
7127           cast<ReferenceType>(RI.getType().getTypePtr());
7128       QualType ElementType = PtrTy->getPointeeType();
7129       CurSizes.push_back(CGF.getTypeSize(ElementType));
7130       // The default map type for a scalar/complex type is 'to' because by
7131       // default the value doesn't have to be retrieved. For an aggregate
7132       // type, the default is 'tofrom'.
7133       CurMapTypes.emplace_back(adjustMapModifiersForPrivateClauses(
7134           CI, ElementType->isAggregateType() ? (OMP_MAP_TO | OMP_MAP_FROM)
7135                                              : OMP_MAP_TO));
7136     }
7137     // Every default map produces a single argument which is a target parameter.
7138     CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
7139   }
7140 };
7141 
7142 enum OpenMPOffloadingReservedDeviceIDs {
7143   /// \brief Device ID if the device was not defined, runtime should get it
7144   /// from environment variables in the spec.
7145   OMP_DEVICEID_UNDEF = -1,
7146 };
7147 } // anonymous namespace
7148 
7149 /// \brief Emit the arrays used to pass the captures and map information to the
7150 /// offloading runtime library. If there is no map or capture information,
7151 /// return nullptr by reference.
7152 static void
7153 emitOffloadingArrays(CodeGenFunction &CGF,
7154                      MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
7155                      MappableExprsHandler::MapValuesArrayTy &Pointers,
7156                      MappableExprsHandler::MapValuesArrayTy &Sizes,
7157                      MappableExprsHandler::MapFlagsArrayTy &MapTypes,
7158                      CGOpenMPRuntime::TargetDataInfo &Info) {
7159   auto &CGM = CGF.CGM;
7160   auto &Ctx = CGF.getContext();
7161 
7162   // Reset the array information.
7163   Info.clearArrayInfo();
7164   Info.NumberOfPtrs = BasePointers.size();
7165 
7166   if (Info.NumberOfPtrs) {
7167     // Detect if we have any capture size requiring runtime evaluation of the
7168     // size so that a constant array could be eventually used.
7169     bool hasRuntimeEvaluationCaptureSize = false;
7170     for (auto *S : Sizes)
7171       if (!isa<llvm::Constant>(S)) {
7172         hasRuntimeEvaluationCaptureSize = true;
7173         break;
7174       }
7175 
7176     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
7177     QualType PointerArrayType =
7178         Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
7179                                  /*IndexTypeQuals=*/0);
7180 
7181     Info.BasePointersArray =
7182         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
7183     Info.PointersArray =
7184         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
7185 
7186     // If we don't have any VLA types or other types that require runtime
7187     // evaluation, we can use a constant array for the map sizes, otherwise we
7188     // need to fill up the arrays as we do for the pointers.
7189     if (hasRuntimeEvaluationCaptureSize) {
7190       QualType SizeArrayType = Ctx.getConstantArrayType(
7191           Ctx.getSizeType(), PointerNumAP, ArrayType::Normal,
7192           /*IndexTypeQuals=*/0);
7193       Info.SizesArray =
7194           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
7195     } else {
7196       // We expect all the sizes to be constant, so we collect them to create
7197       // a constant array.
7198       SmallVector<llvm::Constant *, 16> ConstSizes;
7199       for (auto S : Sizes)
7200         ConstSizes.push_back(cast<llvm::Constant>(S));
7201 
7202       auto *SizesArrayInit = llvm::ConstantArray::get(
7203           llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes);
7204       auto *SizesArrayGbl = new llvm::GlobalVariable(
7205           CGM.getModule(), SizesArrayInit->getType(),
7206           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
7207           SizesArrayInit, ".offload_sizes");
7208       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
7209       Info.SizesArray = SizesArrayGbl;
7210     }
7211 
7212     // The map types are always constant so we don't need to generate code to
7213     // fill arrays. Instead, we create an array constant.
7214     llvm::Constant *MapTypesArrayInit =
7215         llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes);
7216     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
7217         CGM.getModule(), MapTypesArrayInit->getType(),
7218         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
7219         MapTypesArrayInit, ".offload_maptypes");
7220     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
7221     Info.MapTypesArray = MapTypesArrayGbl;
7222 
7223     for (unsigned i = 0; i < Info.NumberOfPtrs; ++i) {
7224       llvm::Value *BPVal = *BasePointers[i];
7225       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
7226           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
7227           Info.BasePointersArray, 0, i);
7228       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7229           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
7230       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
7231       CGF.Builder.CreateStore(BPVal, BPAddr);
7232 
7233       if (Info.requiresDevicePointerInfo())
7234         if (auto *DevVD = BasePointers[i].getDevicePtrDecl())
7235           Info.CaptureDeviceAddrMap.insert(std::make_pair(DevVD, BPAddr));
7236 
7237       llvm::Value *PVal = Pointers[i];
7238       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
7239           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
7240           Info.PointersArray, 0, i);
7241       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7242           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
7243       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
7244       CGF.Builder.CreateStore(PVal, PAddr);
7245 
7246       if (hasRuntimeEvaluationCaptureSize) {
7247         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
7248             llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs),
7249             Info.SizesArray,
7250             /*Idx0=*/0,
7251             /*Idx1=*/i);
7252         Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType()));
7253         CGF.Builder.CreateStore(
7254             CGF.Builder.CreateIntCast(Sizes[i], CGM.SizeTy, /*isSigned=*/true),
7255             SAddr);
7256       }
7257     }
7258   }
7259 }
7260 /// \brief Emit the arguments to be passed to the runtime library based on the
7261 /// arrays of pointers, sizes and map types.
7262 static void emitOffloadingArraysArgument(
7263     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
7264     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
7265     llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
7266   auto &CGM = CGF.CGM;
7267   if (Info.NumberOfPtrs) {
7268     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
7269         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
7270         Info.BasePointersArray,
7271         /*Idx0=*/0, /*Idx1=*/0);
7272     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
7273         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
7274         Info.PointersArray,
7275         /*Idx0=*/0,
7276         /*Idx1=*/0);
7277     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
7278         llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray,
7279         /*Idx0=*/0, /*Idx1=*/0);
7280     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
7281         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
7282         Info.MapTypesArray,
7283         /*Idx0=*/0,
7284         /*Idx1=*/0);
7285   } else {
7286     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
7287     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
7288     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo());
7289     MapTypesArrayArg =
7290         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
7291   }
7292 }
7293 
7294 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
7295                                      const OMPExecutableDirective &D,
7296                                      llvm::Value *OutlinedFn,
7297                                      llvm::Value *OutlinedFnID,
7298                                      const Expr *IfCond, const Expr *Device) {
7299   if (!CGF.HaveInsertPoint())
7300     return;
7301 
7302   assert(OutlinedFn && "Invalid outlined function!");
7303 
7304   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
7305   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
7306   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
7307   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
7308                                             PrePostActionTy &) {
7309     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
7310   };
7311   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
7312 
7313   CodeGenFunction::OMPTargetDataInfo InputInfo;
7314   llvm::Value *MapTypesArray = nullptr;
7315   // Fill up the pointer arrays and transfer execution to the device.
7316   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
7317                     &MapTypesArray, &CS, RequiresOuterTask,
7318                     &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) {
7319     // On top of the arrays that were filled up, the target offloading call
7320     // takes as arguments the device id as well as the host pointer. The host
7321     // pointer is used by the runtime library to identify the current target
7322     // region, so it only has to be unique and not necessarily point to
7323     // anything. It could be the pointer to the outlined function that
7324     // implements the target region, but we aren't using that so that the
7325     // compiler doesn't need to keep that, and could therefore inline the host
7326     // function if proven worthwhile during optimization.
7327 
7328     // From this point on, we need to have an ID of the target region defined.
7329     assert(OutlinedFnID && "Invalid outlined function ID!");
7330 
7331     // Emit device ID if any.
7332     llvm::Value *DeviceID;
7333     if (Device) {
7334       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
7335                                            CGF.Int64Ty, /*isSigned=*/true);
7336     } else {
7337       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
7338     }
7339 
7340     // Emit the number of elements in the offloading arrays.
7341     llvm::Value *PointerNum =
7342         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
7343 
7344     // Return value of the runtime offloading call.
7345     llvm::Value *Return;
7346 
7347     auto *NumTeams = emitNumTeamsForTargetDirective(*this, CGF, D);
7348     auto *NumThreads = emitNumThreadsForTargetDirective(*this, CGF, D);
7349 
7350     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
7351     // The target region is an outlined function launched by the runtime
7352     // via calls __tgt_target() or __tgt_target_teams().
7353     //
7354     // __tgt_target() launches a target region with one team and one thread,
7355     // executing a serial region.  This master thread may in turn launch
7356     // more threads within its team upon encountering a parallel region,
7357     // however, no additional teams can be launched on the device.
7358     //
7359     // __tgt_target_teams() launches a target region with one or more teams,
7360     // each with one or more threads.  This call is required for target
7361     // constructs such as:
7362     //  'target teams'
7363     //  'target' / 'teams'
7364     //  'target teams distribute parallel for'
7365     //  'target parallel'
7366     // and so on.
7367     //
7368     // Note that on the host and CPU targets, the runtime implementation of
7369     // these calls simply call the outlined function without forking threads.
7370     // The outlined functions themselves have runtime calls to
7371     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
7372     // the compiler in emitTeamsCall() and emitParallelCall().
7373     //
7374     // In contrast, on the NVPTX target, the implementation of
7375     // __tgt_target_teams() launches a GPU kernel with the requested number
7376     // of teams and threads so no additional calls to the runtime are required.
7377     if (NumTeams) {
7378       // If we have NumTeams defined this means that we have an enclosed teams
7379       // region. Therefore we also expect to have NumThreads defined. These two
7380       // values should be defined in the presence of a teams directive,
7381       // regardless of having any clauses associated. If the user is using teams
7382       // but no clauses, these two values will be the default that should be
7383       // passed to the runtime library - a 32-bit integer with the value zero.
7384       assert(NumThreads && "Thread limit expression should be available along "
7385                            "with number of teams.");
7386       llvm::Value *OffloadingArgs[] = {DeviceID,
7387                                        OutlinedFnID,
7388                                        PointerNum,
7389                                        InputInfo.BasePointersArray.getPointer(),
7390                                        InputInfo.PointersArray.getPointer(),
7391                                        InputInfo.SizesArray.getPointer(),
7392                                        MapTypesArray,
7393                                        NumTeams,
7394                                        NumThreads};
7395       Return = CGF.EmitRuntimeCall(
7396           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
7397                                           : OMPRTL__tgt_target_teams),
7398           OffloadingArgs);
7399     } else {
7400       llvm::Value *OffloadingArgs[] = {DeviceID,
7401                                        OutlinedFnID,
7402                                        PointerNum,
7403                                        InputInfo.BasePointersArray.getPointer(),
7404                                        InputInfo.PointersArray.getPointer(),
7405                                        InputInfo.SizesArray.getPointer(),
7406                                        MapTypesArray};
7407       Return = CGF.EmitRuntimeCall(
7408           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
7409                                           : OMPRTL__tgt_target),
7410           OffloadingArgs);
7411     }
7412 
7413     // Check the error code and execute the host version if required.
7414     llvm::BasicBlock *OffloadFailedBlock =
7415         CGF.createBasicBlock("omp_offload.failed");
7416     llvm::BasicBlock *OffloadContBlock =
7417         CGF.createBasicBlock("omp_offload.cont");
7418     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
7419     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
7420 
7421     CGF.EmitBlock(OffloadFailedBlock);
7422     if (RequiresOuterTask) {
7423       CapturedVars.clear();
7424       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
7425     }
7426     emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, CapturedVars);
7427     CGF.EmitBranch(OffloadContBlock);
7428 
7429     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
7430   };
7431 
7432   // Notify that the host version must be executed.
7433   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
7434                     RequiresOuterTask](CodeGenFunction &CGF,
7435                                        PrePostActionTy &) {
7436     if (RequiresOuterTask) {
7437       CapturedVars.clear();
7438       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
7439     }
7440     emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, CapturedVars);
7441   };
7442 
7443   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
7444                           &CapturedVars, RequiresOuterTask,
7445                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
7446     // Fill up the arrays with all the captured variables.
7447     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
7448     MappableExprsHandler::MapValuesArrayTy Pointers;
7449     MappableExprsHandler::MapValuesArrayTy Sizes;
7450     MappableExprsHandler::MapFlagsArrayTy MapTypes;
7451 
7452     MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
7453     MappableExprsHandler::MapValuesArrayTy CurPointers;
7454     MappableExprsHandler::MapValuesArrayTy CurSizes;
7455     MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
7456 
7457     // Get mappable expression information.
7458     MappableExprsHandler MEHandler(D, CGF);
7459 
7460     auto RI = CS.getCapturedRecordDecl()->field_begin();
7461     auto CV = CapturedVars.begin();
7462     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
7463                                               CE = CS.capture_end();
7464          CI != CE; ++CI, ++RI, ++CV) {
7465       CurBasePointers.clear();
7466       CurPointers.clear();
7467       CurSizes.clear();
7468       CurMapTypes.clear();
7469 
7470       // VLA sizes are passed to the outlined region by copy and do not have map
7471       // information associated.
7472       if (CI->capturesVariableArrayType()) {
7473         CurBasePointers.push_back(*CV);
7474         CurPointers.push_back(*CV);
7475         CurSizes.push_back(CGF.getTypeSize(RI->getType()));
7476         // Copy to the device as an argument. No need to retrieve it.
7477         CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
7478                               MappableExprsHandler::OMP_MAP_TARGET_PARAM);
7479       } else {
7480         // If we have any information in the map clause, we use it, otherwise we
7481         // just do a default mapping.
7482         MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
7483                                          CurSizes, CurMapTypes);
7484         if (CurBasePointers.empty())
7485           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
7486                                            CurPointers, CurSizes, CurMapTypes);
7487       }
7488       // We expect to have at least an element of information for this capture.
7489       assert(!CurBasePointers.empty() &&
7490              "Non-existing map pointer for capture!");
7491       assert(CurBasePointers.size() == CurPointers.size() &&
7492              CurBasePointers.size() == CurSizes.size() &&
7493              CurBasePointers.size() == CurMapTypes.size() &&
7494              "Inconsistent map information sizes!");
7495 
7496       // We need to append the results of this capture to what we already have.
7497       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
7498       Pointers.append(CurPointers.begin(), CurPointers.end());
7499       Sizes.append(CurSizes.begin(), CurSizes.end());
7500       MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
7501     }
7502     // Map other list items in the map clause which are not captured variables
7503     // but "declare target link" global variables.
7504     for (const auto *C : D.getClausesOfKind<OMPMapClause>()) {
7505       for (auto L : C->component_lists()) {
7506         if (!L.first)
7507           continue;
7508         const auto *VD = dyn_cast<VarDecl>(L.first);
7509         if (!VD)
7510           continue;
7511         llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7512             isDeclareTargetDeclaration(VD);
7513         if (!Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
7514           continue;
7515         MEHandler.generateInfoForComponentList(
7516             C->getMapType(), C->getMapTypeModifier(), L.second, BasePointers,
7517             Pointers, Sizes, MapTypes, /*IsFirstComponentList=*/true,
7518             C->isImplicit());
7519       }
7520     }
7521 
7522     TargetDataInfo Info;
7523     // Fill up the arrays and create the arguments.
7524     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
7525     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
7526                                  Info.PointersArray, Info.SizesArray,
7527                                  Info.MapTypesArray, Info);
7528     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
7529     InputInfo.BasePointersArray =
7530         Address(Info.BasePointersArray, CGM.getPointerAlign());
7531     InputInfo.PointersArray =
7532         Address(Info.PointersArray, CGM.getPointerAlign());
7533     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
7534     MapTypesArray = Info.MapTypesArray;
7535     if (RequiresOuterTask)
7536       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
7537     else
7538       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
7539   };
7540 
7541   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
7542                              CodeGenFunction &CGF, PrePostActionTy &) {
7543     if (RequiresOuterTask) {
7544       CodeGenFunction::OMPTargetDataInfo InputInfo;
7545       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
7546     } else {
7547       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
7548     }
7549   };
7550 
7551   // If we have a target function ID it means that we need to support
7552   // offloading, otherwise, just execute on the host. We need to execute on host
7553   // regardless of the conditional in the if clause if, e.g., the user do not
7554   // specify target triples.
7555   if (OutlinedFnID) {
7556     if (IfCond) {
7557       emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
7558     } else {
7559       RegionCodeGenTy ThenRCG(TargetThenGen);
7560       ThenRCG(CGF);
7561     }
7562   } else {
7563     RegionCodeGenTy ElseRCG(TargetElseGen);
7564     ElseRCG(CGF);
7565   }
7566 }
7567 
7568 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
7569                                                     StringRef ParentName) {
7570   if (!S)
7571     return;
7572 
7573   // Codegen OMP target directives that offload compute to the device.
7574   bool requiresDeviceCodegen =
7575       isa<OMPExecutableDirective>(S) &&
7576       isOpenMPTargetExecutionDirective(
7577           cast<OMPExecutableDirective>(S)->getDirectiveKind());
7578 
7579   if (requiresDeviceCodegen) {
7580     auto &E = *cast<OMPExecutableDirective>(S);
7581     unsigned DeviceID;
7582     unsigned FileID;
7583     unsigned Line;
7584     getTargetEntryUniqueInfo(CGM.getContext(), E.getLocStart(), DeviceID,
7585                              FileID, Line);
7586 
7587     // Is this a target region that should not be emitted as an entry point? If
7588     // so just signal we are done with this target region.
7589     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
7590                                                             ParentName, Line))
7591       return;
7592 
7593     switch (S->getStmtClass()) {
7594     case Stmt::OMPTargetDirectiveClass:
7595       CodeGenFunction::EmitOMPTargetDeviceFunction(
7596           CGM, ParentName, cast<OMPTargetDirective>(*S));
7597       break;
7598     case Stmt::OMPTargetParallelDirectiveClass:
7599       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
7600           CGM, ParentName, cast<OMPTargetParallelDirective>(*S));
7601       break;
7602     case Stmt::OMPTargetTeamsDirectiveClass:
7603       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
7604           CGM, ParentName, cast<OMPTargetTeamsDirective>(*S));
7605       break;
7606     case Stmt::OMPTargetTeamsDistributeDirectiveClass:
7607       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
7608           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(*S));
7609       break;
7610     case Stmt::OMPTargetTeamsDistributeSimdDirectiveClass:
7611       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
7612           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(*S));
7613       break;
7614     case Stmt::OMPTargetParallelForDirectiveClass:
7615       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
7616           CGM, ParentName, cast<OMPTargetParallelForDirective>(*S));
7617       break;
7618     case Stmt::OMPTargetParallelForSimdDirectiveClass:
7619       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
7620           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(*S));
7621       break;
7622     case Stmt::OMPTargetSimdDirectiveClass:
7623       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
7624           CGM, ParentName, cast<OMPTargetSimdDirective>(*S));
7625       break;
7626     case Stmt::OMPTargetTeamsDistributeParallelForDirectiveClass:
7627       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
7628           CGM, ParentName,
7629           cast<OMPTargetTeamsDistributeParallelForDirective>(*S));
7630       break;
7631     case Stmt::OMPTargetTeamsDistributeParallelForSimdDirectiveClass:
7632       CodeGenFunction::
7633           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
7634               CGM, ParentName,
7635               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(*S));
7636       break;
7637     default:
7638       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
7639     }
7640     return;
7641   }
7642 
7643   if (const OMPExecutableDirective *E = dyn_cast<OMPExecutableDirective>(S)) {
7644     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
7645       return;
7646 
7647     scanForTargetRegionsFunctions(
7648         E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
7649     return;
7650   }
7651 
7652   // If this is a lambda function, look into its body.
7653   if (auto *L = dyn_cast<LambdaExpr>(S))
7654     S = L->getBody();
7655 
7656   // Keep looking for target regions recursively.
7657   for (auto *II : S->children())
7658     scanForTargetRegionsFunctions(II, ParentName);
7659 }
7660 
7661 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
7662   auto &FD = *cast<FunctionDecl>(GD.getDecl());
7663 
7664   // If emitting code for the host, we do not process FD here. Instead we do
7665   // the normal code generation.
7666   if (!CGM.getLangOpts().OpenMPIsDevice)
7667     return false;
7668 
7669   // Try to detect target regions in the function.
7670   scanForTargetRegionsFunctions(FD.getBody(), CGM.getMangledName(GD));
7671 
7672   // Do not to emit function if it is not marked as declare target.
7673   return !isDeclareTargetDeclaration(&FD);
7674 }
7675 
7676 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
7677   if (!CGM.getLangOpts().OpenMPIsDevice)
7678     return false;
7679 
7680   // Check if there are Ctors/Dtors in this declaration and look for target
7681   // regions in it. We use the complete variant to produce the kernel name
7682   // mangling.
7683   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
7684   if (auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
7685     for (auto *Ctor : RD->ctors()) {
7686       StringRef ParentName =
7687           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
7688       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
7689     }
7690     auto *Dtor = RD->getDestructor();
7691     if (Dtor) {
7692       StringRef ParentName =
7693           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
7694       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
7695     }
7696   }
7697 
7698   // Do not to emit variable if it is not marked as declare target.
7699   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7700       isDeclareTargetDeclaration(cast<VarDecl>(GD.getDecl()));
7701   return !Res || *Res == OMPDeclareTargetDeclAttr::MT_Link;
7702 }
7703 
7704 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
7705                                                    llvm::Constant *Addr) {
7706   if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7707           isDeclareTargetDeclaration(VD)) {
7708     OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
7709     StringRef VarName;
7710     CharUnits VarSize;
7711     llvm::GlobalValue::LinkageTypes Linkage;
7712     switch (*Res) {
7713     case OMPDeclareTargetDeclAttr::MT_To:
7714       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
7715       VarName = CGM.getMangledName(VD);
7716       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
7717       Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
7718       break;
7719     case OMPDeclareTargetDeclAttr::MT_Link:
7720       // Map type 'to' because we do not map the original variable but the
7721       // reference.
7722       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
7723       if (!CGM.getLangOpts().OpenMPIsDevice) {
7724         Addr =
7725             cast<llvm::Constant>(getAddrOfDeclareTargetLink(VD).getPointer());
7726       }
7727       VarName = Addr->getName();
7728       VarSize = CGM.getPointerSize();
7729       Linkage = llvm::GlobalValue::WeakAnyLinkage;
7730       break;
7731     }
7732     OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
7733         VarName, Addr, VarSize, Flags, Linkage);
7734   }
7735 }
7736 
7737 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
7738   auto *VD = GD.getDecl();
7739   if (isa<FunctionDecl>(VD))
7740     return emitTargetFunctions(GD);
7741 
7742   return emitTargetGlobalVariable(GD);
7743 }
7744 
7745 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
7746     CodeGenModule &CGM)
7747     : CGM(CGM) {
7748   if (CGM.getLangOpts().OpenMPIsDevice) {
7749     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
7750     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
7751   }
7752 }
7753 
7754 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
7755   if (CGM.getLangOpts().OpenMPIsDevice)
7756     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
7757 }
7758 
7759 bool CGOpenMPRuntime::markAsGlobalTarget(const FunctionDecl *D) {
7760   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
7761     return true;
7762 
7763   const FunctionDecl *FD = D->getCanonicalDecl();
7764   // Do not to emit function if it is marked as declare target as it was already
7765   // emitted.
7766   if (isDeclareTargetDeclaration(D)) {
7767     if (D->hasBody() && AlreadyEmittedTargetFunctions.count(FD) == 0) {
7768       if (auto *F = dyn_cast_or_null<llvm::Function>(
7769               CGM.GetGlobalValue(CGM.getMangledName(D))))
7770         return !F->isDeclaration();
7771       return false;
7772     }
7773     return true;
7774   }
7775 
7776   // Do not mark member functions except for static.
7777   if (const auto *Method = dyn_cast<CXXMethodDecl>(FD))
7778     if (!Method->isStatic())
7779       return true;
7780 
7781   return !AlreadyEmittedTargetFunctions.insert(FD).second;
7782 }
7783 
7784 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
7785   // If we have offloading in the current module, we need to emit the entries
7786   // now and register the offloading descriptor.
7787   createOffloadEntriesAndInfoMetadata();
7788 
7789   // Create and register the offloading binary descriptors. This is the main
7790   // entity that captures all the information about offloading in the current
7791   // compilation unit.
7792   return createOffloadingBinaryDescriptorRegistration();
7793 }
7794 
7795 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
7796                                     const OMPExecutableDirective &D,
7797                                     SourceLocation Loc,
7798                                     llvm::Value *OutlinedFn,
7799                                     ArrayRef<llvm::Value *> CapturedVars) {
7800   if (!CGF.HaveInsertPoint())
7801     return;
7802 
7803   auto *RTLoc = emitUpdateLocation(CGF, Loc);
7804   CodeGenFunction::RunCleanupsScope Scope(CGF);
7805 
7806   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
7807   llvm::Value *Args[] = {
7808       RTLoc,
7809       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
7810       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
7811   llvm::SmallVector<llvm::Value *, 16> RealArgs;
7812   RealArgs.append(std::begin(Args), std::end(Args));
7813   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
7814 
7815   auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
7816   CGF.EmitRuntimeCall(RTLFn, RealArgs);
7817 }
7818 
7819 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
7820                                          const Expr *NumTeams,
7821                                          const Expr *ThreadLimit,
7822                                          SourceLocation Loc) {
7823   if (!CGF.HaveInsertPoint())
7824     return;
7825 
7826   auto *RTLoc = emitUpdateLocation(CGF, Loc);
7827 
7828   llvm::Value *NumTeamsVal =
7829       (NumTeams)
7830           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
7831                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
7832           : CGF.Builder.getInt32(0);
7833 
7834   llvm::Value *ThreadLimitVal =
7835       (ThreadLimit)
7836           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
7837                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
7838           : CGF.Builder.getInt32(0);
7839 
7840   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
7841   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
7842                                      ThreadLimitVal};
7843   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
7844                       PushNumTeamsArgs);
7845 }
7846 
7847 void CGOpenMPRuntime::emitTargetDataCalls(
7848     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
7849     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
7850   if (!CGF.HaveInsertPoint())
7851     return;
7852 
7853   // Action used to replace the default codegen action and turn privatization
7854   // off.
7855   PrePostActionTy NoPrivAction;
7856 
7857   // Generate the code for the opening of the data environment. Capture all the
7858   // arguments of the runtime call by reference because they are used in the
7859   // closing of the region.
7860   auto &&BeginThenGen = [this, &D, Device, &Info,
7861                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
7862     // Fill up the arrays with all the mapped variables.
7863     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
7864     MappableExprsHandler::MapValuesArrayTy Pointers;
7865     MappableExprsHandler::MapValuesArrayTy Sizes;
7866     MappableExprsHandler::MapFlagsArrayTy MapTypes;
7867 
7868     // Get map clause information.
7869     MappableExprsHandler MCHandler(D, CGF);
7870     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
7871 
7872     // Fill up the arrays and create the arguments.
7873     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
7874 
7875     llvm::Value *BasePointersArrayArg = nullptr;
7876     llvm::Value *PointersArrayArg = nullptr;
7877     llvm::Value *SizesArrayArg = nullptr;
7878     llvm::Value *MapTypesArrayArg = nullptr;
7879     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
7880                                  SizesArrayArg, MapTypesArrayArg, Info);
7881 
7882     // Emit device ID if any.
7883     llvm::Value *DeviceID = nullptr;
7884     if (Device) {
7885       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
7886                                            CGF.Int64Ty, /*isSigned=*/true);
7887     } else {
7888       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
7889     }
7890 
7891     // Emit the number of elements in the offloading arrays.
7892     auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
7893 
7894     llvm::Value *OffloadingArgs[] = {
7895         DeviceID,         PointerNum,    BasePointersArrayArg,
7896         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
7897     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin),
7898                         OffloadingArgs);
7899 
7900     // If device pointer privatization is required, emit the body of the region
7901     // here. It will have to be duplicated: with and without privatization.
7902     if (!Info.CaptureDeviceAddrMap.empty())
7903       CodeGen(CGF);
7904   };
7905 
7906   // Generate code for the closing of the data region.
7907   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
7908                                             PrePostActionTy &) {
7909     assert(Info.isValid() && "Invalid data environment closing arguments.");
7910 
7911     llvm::Value *BasePointersArrayArg = nullptr;
7912     llvm::Value *PointersArrayArg = nullptr;
7913     llvm::Value *SizesArrayArg = nullptr;
7914     llvm::Value *MapTypesArrayArg = nullptr;
7915     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
7916                                  SizesArrayArg, MapTypesArrayArg, Info);
7917 
7918     // Emit device ID if any.
7919     llvm::Value *DeviceID = nullptr;
7920     if (Device) {
7921       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
7922                                            CGF.Int64Ty, /*isSigned=*/true);
7923     } else {
7924       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
7925     }
7926 
7927     // Emit the number of elements in the offloading arrays.
7928     auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
7929 
7930     llvm::Value *OffloadingArgs[] = {
7931         DeviceID,         PointerNum,    BasePointersArrayArg,
7932         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
7933     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end),
7934                         OffloadingArgs);
7935   };
7936 
7937   // If we need device pointer privatization, we need to emit the body of the
7938   // region with no privatization in the 'else' branch of the conditional.
7939   // Otherwise, we don't have to do anything.
7940   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
7941                                                          PrePostActionTy &) {
7942     if (!Info.CaptureDeviceAddrMap.empty()) {
7943       CodeGen.setAction(NoPrivAction);
7944       CodeGen(CGF);
7945     }
7946   };
7947 
7948   // We don't have to do anything to close the region if the if clause evaluates
7949   // to false.
7950   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
7951 
7952   if (IfCond) {
7953     emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
7954   } else {
7955     RegionCodeGenTy RCG(BeginThenGen);
7956     RCG(CGF);
7957   }
7958 
7959   // If we don't require privatization of device pointers, we emit the body in
7960   // between the runtime calls. This avoids duplicating the body code.
7961   if (Info.CaptureDeviceAddrMap.empty()) {
7962     CodeGen.setAction(NoPrivAction);
7963     CodeGen(CGF);
7964   }
7965 
7966   if (IfCond) {
7967     emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen);
7968   } else {
7969     RegionCodeGenTy RCG(EndThenGen);
7970     RCG(CGF);
7971   }
7972 }
7973 
7974 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
7975     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
7976     const Expr *Device) {
7977   if (!CGF.HaveInsertPoint())
7978     return;
7979 
7980   assert((isa<OMPTargetEnterDataDirective>(D) ||
7981           isa<OMPTargetExitDataDirective>(D) ||
7982           isa<OMPTargetUpdateDirective>(D)) &&
7983          "Expecting either target enter, exit data, or update directives.");
7984 
7985   CodeGenFunction::OMPTargetDataInfo InputInfo;
7986   llvm::Value *MapTypesArray = nullptr;
7987   // Generate the code for the opening of the data environment.
7988   auto &&ThenGen = [this, &D, Device, &InputInfo,
7989                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
7990     // Emit device ID if any.
7991     llvm::Value *DeviceID = nullptr;
7992     if (Device) {
7993       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
7994                                            CGF.Int64Ty, /*isSigned=*/true);
7995     } else {
7996       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
7997     }
7998 
7999     // Emit the number of elements in the offloading arrays.
8000     llvm::Constant *PointerNum =
8001         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
8002 
8003     llvm::Value *OffloadingArgs[] = {DeviceID,
8004                                      PointerNum,
8005                                      InputInfo.BasePointersArray.getPointer(),
8006                                      InputInfo.PointersArray.getPointer(),
8007                                      InputInfo.SizesArray.getPointer(),
8008                                      MapTypesArray};
8009 
8010     // Select the right runtime function call for each expected standalone
8011     // directive.
8012     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
8013     OpenMPRTLFunction RTLFn;
8014     switch (D.getDirectiveKind()) {
8015     default:
8016       llvm_unreachable("Unexpected standalone target data directive.");
8017       break;
8018     case OMPD_target_enter_data:
8019       RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
8020                         : OMPRTL__tgt_target_data_begin;
8021       break;
8022     case OMPD_target_exit_data:
8023       RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
8024                         : OMPRTL__tgt_target_data_end;
8025       break;
8026     case OMPD_target_update:
8027       RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
8028                         : OMPRTL__tgt_target_data_update;
8029       break;
8030     }
8031     CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
8032   };
8033 
8034   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
8035                              CodeGenFunction &CGF, PrePostActionTy &) {
8036     // Fill up the arrays with all the mapped variables.
8037     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
8038     MappableExprsHandler::MapValuesArrayTy Pointers;
8039     MappableExprsHandler::MapValuesArrayTy Sizes;
8040     MappableExprsHandler::MapFlagsArrayTy MapTypes;
8041 
8042     // Get map clause information.
8043     MappableExprsHandler MEHandler(D, CGF);
8044     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
8045 
8046     TargetDataInfo Info;
8047     // Fill up the arrays and create the arguments.
8048     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
8049     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
8050                                  Info.PointersArray, Info.SizesArray,
8051                                  Info.MapTypesArray, Info);
8052     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
8053     InputInfo.BasePointersArray =
8054         Address(Info.BasePointersArray, CGM.getPointerAlign());
8055     InputInfo.PointersArray =
8056         Address(Info.PointersArray, CGM.getPointerAlign());
8057     InputInfo.SizesArray =
8058         Address(Info.SizesArray, CGM.getPointerAlign());
8059     MapTypesArray = Info.MapTypesArray;
8060     if (D.hasClausesOfKind<OMPDependClause>())
8061       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
8062     else
8063       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
8064   };
8065 
8066   if (IfCond)
8067     emitOMPIfClause(CGF, IfCond, TargetThenGen,
8068                     [](CodeGenFunction &CGF, PrePostActionTy &) {});
8069   else {
8070     RegionCodeGenTy ThenRCG(TargetThenGen);
8071     ThenRCG(CGF);
8072   }
8073 }
8074 
8075 namespace {
8076   /// Kind of parameter in a function with 'declare simd' directive.
8077   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
8078   /// Attribute set of the parameter.
8079   struct ParamAttrTy {
8080     ParamKindTy Kind = Vector;
8081     llvm::APSInt StrideOrArg;
8082     llvm::APSInt Alignment;
8083   };
8084 } // namespace
8085 
8086 static unsigned evaluateCDTSize(const FunctionDecl *FD,
8087                                 ArrayRef<ParamAttrTy> ParamAttrs) {
8088   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
8089   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
8090   // of that clause. The VLEN value must be power of 2.
8091   // In other case the notion of the function`s "characteristic data type" (CDT)
8092   // is used to compute the vector length.
8093   // CDT is defined in the following order:
8094   //   a) For non-void function, the CDT is the return type.
8095   //   b) If the function has any non-uniform, non-linear parameters, then the
8096   //   CDT is the type of the first such parameter.
8097   //   c) If the CDT determined by a) or b) above is struct, union, or class
8098   //   type which is pass-by-value (except for the type that maps to the
8099   //   built-in complex data type), the characteristic data type is int.
8100   //   d) If none of the above three cases is applicable, the CDT is int.
8101   // The VLEN is then determined based on the CDT and the size of vector
8102   // register of that ISA for which current vector version is generated. The
8103   // VLEN is computed using the formula below:
8104   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
8105   // where vector register size specified in section 3.2.1 Registers and the
8106   // Stack Frame of original AMD64 ABI document.
8107   QualType RetType = FD->getReturnType();
8108   if (RetType.isNull())
8109     return 0;
8110   ASTContext &C = FD->getASTContext();
8111   QualType CDT;
8112   if (!RetType.isNull() && !RetType->isVoidType())
8113     CDT = RetType;
8114   else {
8115     unsigned Offset = 0;
8116     if (auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
8117       if (ParamAttrs[Offset].Kind == Vector)
8118         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
8119       ++Offset;
8120     }
8121     if (CDT.isNull()) {
8122       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
8123         if (ParamAttrs[I + Offset].Kind == Vector) {
8124           CDT = FD->getParamDecl(I)->getType();
8125           break;
8126         }
8127       }
8128     }
8129   }
8130   if (CDT.isNull())
8131     CDT = C.IntTy;
8132   CDT = CDT->getCanonicalTypeUnqualified();
8133   if (CDT->isRecordType() || CDT->isUnionType())
8134     CDT = C.IntTy;
8135   return C.getTypeSize(CDT);
8136 }
8137 
8138 static void
8139 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
8140                            const llvm::APSInt &VLENVal,
8141                            ArrayRef<ParamAttrTy> ParamAttrs,
8142                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
8143   struct ISADataTy {
8144     char ISA;
8145     unsigned VecRegSize;
8146   };
8147   ISADataTy ISAData[] = {
8148       {
8149           'b', 128
8150       }, // SSE
8151       {
8152           'c', 256
8153       }, // AVX
8154       {
8155           'd', 256
8156       }, // AVX2
8157       {
8158           'e', 512
8159       }, // AVX512
8160   };
8161   llvm::SmallVector<char, 2> Masked;
8162   switch (State) {
8163   case OMPDeclareSimdDeclAttr::BS_Undefined:
8164     Masked.push_back('N');
8165     Masked.push_back('M');
8166     break;
8167   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
8168     Masked.push_back('N');
8169     break;
8170   case OMPDeclareSimdDeclAttr::BS_Inbranch:
8171     Masked.push_back('M');
8172     break;
8173   }
8174   for (auto Mask : Masked) {
8175     for (auto &Data : ISAData) {
8176       SmallString<256> Buffer;
8177       llvm::raw_svector_ostream Out(Buffer);
8178       Out << "_ZGV" << Data.ISA << Mask;
8179       if (!VLENVal) {
8180         Out << llvm::APSInt::getUnsigned(Data.VecRegSize /
8181                                          evaluateCDTSize(FD, ParamAttrs));
8182       } else
8183         Out << VLENVal;
8184       for (auto &ParamAttr : ParamAttrs) {
8185         switch (ParamAttr.Kind){
8186         case LinearWithVarStride:
8187           Out << 's' << ParamAttr.StrideOrArg;
8188           break;
8189         case Linear:
8190           Out << 'l';
8191           if (!!ParamAttr.StrideOrArg)
8192             Out << ParamAttr.StrideOrArg;
8193           break;
8194         case Uniform:
8195           Out << 'u';
8196           break;
8197         case Vector:
8198           Out << 'v';
8199           break;
8200         }
8201         if (!!ParamAttr.Alignment)
8202           Out << 'a' << ParamAttr.Alignment;
8203       }
8204       Out << '_' << Fn->getName();
8205       Fn->addFnAttr(Out.str());
8206     }
8207   }
8208 }
8209 
8210 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
8211                                               llvm::Function *Fn) {
8212   ASTContext &C = CGM.getContext();
8213   FD = FD->getMostRecentDecl();
8214   // Map params to their positions in function decl.
8215   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
8216   if (isa<CXXMethodDecl>(FD))
8217     ParamPositions.insert({FD, 0});
8218   unsigned ParamPos = ParamPositions.size();
8219   for (auto *P : FD->parameters()) {
8220     ParamPositions.insert({P->getCanonicalDecl(), ParamPos});
8221     ++ParamPos;
8222   }
8223   while (FD) {
8224     for (auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
8225       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
8226       // Mark uniform parameters.
8227       for (auto *E : Attr->uniforms()) {
8228         E = E->IgnoreParenImpCasts();
8229         unsigned Pos;
8230         if (isa<CXXThisExpr>(E))
8231           Pos = ParamPositions[FD];
8232         else {
8233           auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
8234                           ->getCanonicalDecl();
8235           Pos = ParamPositions[PVD];
8236         }
8237         ParamAttrs[Pos].Kind = Uniform;
8238       }
8239       // Get alignment info.
8240       auto NI = Attr->alignments_begin();
8241       for (auto *E : Attr->aligneds()) {
8242         E = E->IgnoreParenImpCasts();
8243         unsigned Pos;
8244         QualType ParmTy;
8245         if (isa<CXXThisExpr>(E)) {
8246           Pos = ParamPositions[FD];
8247           ParmTy = E->getType();
8248         } else {
8249           auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
8250                           ->getCanonicalDecl();
8251           Pos = ParamPositions[PVD];
8252           ParmTy = PVD->getType();
8253         }
8254         ParamAttrs[Pos].Alignment =
8255             (*NI)
8256                 ? (*NI)->EvaluateKnownConstInt(C)
8257                 : llvm::APSInt::getUnsigned(
8258                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
8259                           .getQuantity());
8260         ++NI;
8261       }
8262       // Mark linear parameters.
8263       auto SI = Attr->steps_begin();
8264       auto MI = Attr->modifiers_begin();
8265       for (auto *E : Attr->linears()) {
8266         E = E->IgnoreParenImpCasts();
8267         unsigned Pos;
8268         if (isa<CXXThisExpr>(E))
8269           Pos = ParamPositions[FD];
8270         else {
8271           auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
8272                           ->getCanonicalDecl();
8273           Pos = ParamPositions[PVD];
8274         }
8275         auto &ParamAttr = ParamAttrs[Pos];
8276         ParamAttr.Kind = Linear;
8277         if (*SI) {
8278           if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C,
8279                                     Expr::SE_AllowSideEffects)) {
8280             if (auto *DRE = cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
8281               if (auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
8282                 ParamAttr.Kind = LinearWithVarStride;
8283                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
8284                     ParamPositions[StridePVD->getCanonicalDecl()]);
8285               }
8286             }
8287           }
8288         }
8289         ++SI;
8290         ++MI;
8291       }
8292       llvm::APSInt VLENVal;
8293       if (const Expr *VLEN = Attr->getSimdlen())
8294         VLENVal = VLEN->EvaluateKnownConstInt(C);
8295       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
8296       if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
8297           CGM.getTriple().getArch() == llvm::Triple::x86_64)
8298         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
8299     }
8300     FD = FD->getPreviousDecl();
8301   }
8302 }
8303 
8304 namespace {
8305 /// Cleanup action for doacross support.
8306 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
8307 public:
8308   static const int DoacrossFinArgs = 2;
8309 
8310 private:
8311   llvm::Value *RTLFn;
8312   llvm::Value *Args[DoacrossFinArgs];
8313 
8314 public:
8315   DoacrossCleanupTy(llvm::Value *RTLFn, ArrayRef<llvm::Value *> CallArgs)
8316       : RTLFn(RTLFn) {
8317     assert(CallArgs.size() == DoacrossFinArgs);
8318     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
8319   }
8320   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
8321     if (!CGF.HaveInsertPoint())
8322       return;
8323     CGF.EmitRuntimeCall(RTLFn, Args);
8324   }
8325 };
8326 } // namespace
8327 
8328 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
8329                                        const OMPLoopDirective &D) {
8330   if (!CGF.HaveInsertPoint())
8331     return;
8332 
8333   ASTContext &C = CGM.getContext();
8334   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
8335   RecordDecl *RD;
8336   if (KmpDimTy.isNull()) {
8337     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
8338     //  kmp_int64 lo; // lower
8339     //  kmp_int64 up; // upper
8340     //  kmp_int64 st; // stride
8341     // };
8342     RD = C.buildImplicitRecord("kmp_dim");
8343     RD->startDefinition();
8344     addFieldToRecordDecl(C, RD, Int64Ty);
8345     addFieldToRecordDecl(C, RD, Int64Ty);
8346     addFieldToRecordDecl(C, RD, Int64Ty);
8347     RD->completeDefinition();
8348     KmpDimTy = C.getRecordType(RD);
8349   } else
8350     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
8351 
8352   Address DimsAddr = CGF.CreateMemTemp(KmpDimTy, "dims");
8353   CGF.EmitNullInitialization(DimsAddr, KmpDimTy);
8354   enum { LowerFD = 0, UpperFD, StrideFD };
8355   // Fill dims with data.
8356   LValue DimsLVal = CGF.MakeAddrLValue(DimsAddr, KmpDimTy);
8357   // dims.upper = num_iterations;
8358   LValue UpperLVal =
8359       CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), UpperFD));
8360   llvm::Value *NumIterVal = CGF.EmitScalarConversion(
8361       CGF.EmitScalarExpr(D.getNumIterations()), D.getNumIterations()->getType(),
8362       Int64Ty, D.getNumIterations()->getExprLoc());
8363   CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
8364   // dims.stride = 1;
8365   LValue StrideLVal =
8366       CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), StrideFD));
8367   CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
8368                         StrideLVal);
8369 
8370   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
8371   // kmp_int32 num_dims, struct kmp_dim * dims);
8372   llvm::Value *Args[] = {emitUpdateLocation(CGF, D.getLocStart()),
8373                          getThreadID(CGF, D.getLocStart()),
8374                          llvm::ConstantInt::getSigned(CGM.Int32Ty, 1),
8375                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8376                              DimsAddr.getPointer(), CGM.VoidPtrTy)};
8377 
8378   llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_init);
8379   CGF.EmitRuntimeCall(RTLFn, Args);
8380   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
8381       emitUpdateLocation(CGF, D.getLocEnd()), getThreadID(CGF, D.getLocEnd())};
8382   llvm::Value *FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
8383   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
8384                                              llvm::makeArrayRef(FiniArgs));
8385 }
8386 
8387 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
8388                                           const OMPDependClause *C) {
8389   QualType Int64Ty =
8390       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
8391   const Expr *CounterVal = C->getCounterValue();
8392   assert(CounterVal);
8393   llvm::Value *CntVal = CGF.EmitScalarConversion(CGF.EmitScalarExpr(CounterVal),
8394                                                  CounterVal->getType(), Int64Ty,
8395                                                  CounterVal->getExprLoc());
8396   Address CntAddr = CGF.CreateMemTemp(Int64Ty, ".cnt.addr");
8397   CGF.EmitStoreOfScalar(CntVal, CntAddr, /*Volatile=*/false, Int64Ty);
8398   llvm::Value *Args[] = {emitUpdateLocation(CGF, C->getLocStart()),
8399                          getThreadID(CGF, C->getLocStart()),
8400                          CntAddr.getPointer()};
8401   llvm::Value *RTLFn;
8402   if (C->getDependencyKind() == OMPC_DEPEND_source)
8403     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
8404   else {
8405     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
8406     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
8407   }
8408   CGF.EmitRuntimeCall(RTLFn, Args);
8409 }
8410 
8411 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
8412                                llvm::Value *Callee,
8413                                ArrayRef<llvm::Value *> Args) const {
8414   assert(Loc.isValid() && "Outlined function call location must be valid.");
8415   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
8416 
8417   if (auto *Fn = dyn_cast<llvm::Function>(Callee)) {
8418     if (Fn->doesNotThrow()) {
8419       CGF.EmitNounwindRuntimeCall(Fn, Args);
8420       return;
8421     }
8422   }
8423   CGF.EmitRuntimeCall(Callee, Args);
8424 }
8425 
8426 void CGOpenMPRuntime::emitOutlinedFunctionCall(
8427     CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn,
8428     ArrayRef<llvm::Value *> Args) const {
8429   emitCall(CGF, Loc, OutlinedFn, Args);
8430 }
8431 
8432 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
8433                                              const VarDecl *NativeParam,
8434                                              const VarDecl *TargetParam) const {
8435   return CGF.GetAddrOfLocalVar(NativeParam);
8436 }
8437 
8438 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
8439                                                    const VarDecl *VD) {
8440   return Address::invalid();
8441 }
8442 
8443 llvm::Value *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
8444     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
8445     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
8446   llvm_unreachable("Not supported in SIMD-only mode");
8447 }
8448 
8449 llvm::Value *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
8450     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
8451     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
8452   llvm_unreachable("Not supported in SIMD-only mode");
8453 }
8454 
8455 llvm::Value *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
8456     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
8457     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
8458     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
8459     bool Tied, unsigned &NumberOfParts) {
8460   llvm_unreachable("Not supported in SIMD-only mode");
8461 }
8462 
8463 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
8464                                            SourceLocation Loc,
8465                                            llvm::Value *OutlinedFn,
8466                                            ArrayRef<llvm::Value *> CapturedVars,
8467                                            const Expr *IfCond) {
8468   llvm_unreachable("Not supported in SIMD-only mode");
8469 }
8470 
8471 void CGOpenMPSIMDRuntime::emitCriticalRegion(
8472     CodeGenFunction &CGF, StringRef CriticalName,
8473     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
8474     const Expr *Hint) {
8475   llvm_unreachable("Not supported in SIMD-only mode");
8476 }
8477 
8478 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
8479                                            const RegionCodeGenTy &MasterOpGen,
8480                                            SourceLocation Loc) {
8481   llvm_unreachable("Not supported in SIMD-only mode");
8482 }
8483 
8484 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
8485                                             SourceLocation Loc) {
8486   llvm_unreachable("Not supported in SIMD-only mode");
8487 }
8488 
8489 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
8490     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
8491     SourceLocation Loc) {
8492   llvm_unreachable("Not supported in SIMD-only mode");
8493 }
8494 
8495 void CGOpenMPSIMDRuntime::emitSingleRegion(
8496     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
8497     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
8498     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
8499     ArrayRef<const Expr *> AssignmentOps) {
8500   llvm_unreachable("Not supported in SIMD-only mode");
8501 }
8502 
8503 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
8504                                             const RegionCodeGenTy &OrderedOpGen,
8505                                             SourceLocation Loc,
8506                                             bool IsThreads) {
8507   llvm_unreachable("Not supported in SIMD-only mode");
8508 }
8509 
8510 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
8511                                           SourceLocation Loc,
8512                                           OpenMPDirectiveKind Kind,
8513                                           bool EmitChecks,
8514                                           bool ForceSimpleCall) {
8515   llvm_unreachable("Not supported in SIMD-only mode");
8516 }
8517 
8518 void CGOpenMPSIMDRuntime::emitForDispatchInit(
8519     CodeGenFunction &CGF, SourceLocation Loc,
8520     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
8521     bool Ordered, const DispatchRTInput &DispatchValues) {
8522   llvm_unreachable("Not supported in SIMD-only mode");
8523 }
8524 
8525 void CGOpenMPSIMDRuntime::emitForStaticInit(
8526     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
8527     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
8528   llvm_unreachable("Not supported in SIMD-only mode");
8529 }
8530 
8531 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
8532     CodeGenFunction &CGF, SourceLocation Loc,
8533     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
8534   llvm_unreachable("Not supported in SIMD-only mode");
8535 }
8536 
8537 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
8538                                                      SourceLocation Loc,
8539                                                      unsigned IVSize,
8540                                                      bool IVSigned) {
8541   llvm_unreachable("Not supported in SIMD-only mode");
8542 }
8543 
8544 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
8545                                               SourceLocation Loc,
8546                                               OpenMPDirectiveKind DKind) {
8547   llvm_unreachable("Not supported in SIMD-only mode");
8548 }
8549 
8550 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
8551                                               SourceLocation Loc,
8552                                               unsigned IVSize, bool IVSigned,
8553                                               Address IL, Address LB,
8554                                               Address UB, Address ST) {
8555   llvm_unreachable("Not supported in SIMD-only mode");
8556 }
8557 
8558 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
8559                                                llvm::Value *NumThreads,
8560                                                SourceLocation Loc) {
8561   llvm_unreachable("Not supported in SIMD-only mode");
8562 }
8563 
8564 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
8565                                              OpenMPProcBindClauseKind ProcBind,
8566                                              SourceLocation Loc) {
8567   llvm_unreachable("Not supported in SIMD-only mode");
8568 }
8569 
8570 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
8571                                                     const VarDecl *VD,
8572                                                     Address VDAddr,
8573                                                     SourceLocation Loc) {
8574   llvm_unreachable("Not supported in SIMD-only mode");
8575 }
8576 
8577 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
8578     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
8579     CodeGenFunction *CGF) {
8580   llvm_unreachable("Not supported in SIMD-only mode");
8581 }
8582 
8583 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
8584     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
8585   llvm_unreachable("Not supported in SIMD-only mode");
8586 }
8587 
8588 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
8589                                     ArrayRef<const Expr *> Vars,
8590                                     SourceLocation Loc) {
8591   llvm_unreachable("Not supported in SIMD-only mode");
8592 }
8593 
8594 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
8595                                        const OMPExecutableDirective &D,
8596                                        llvm::Value *TaskFunction,
8597                                        QualType SharedsTy, Address Shareds,
8598                                        const Expr *IfCond,
8599                                        const OMPTaskDataTy &Data) {
8600   llvm_unreachable("Not supported in SIMD-only mode");
8601 }
8602 
8603 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
8604     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
8605     llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds,
8606     const Expr *IfCond, const OMPTaskDataTy &Data) {
8607   llvm_unreachable("Not supported in SIMD-only mode");
8608 }
8609 
8610 void CGOpenMPSIMDRuntime::emitReduction(
8611     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
8612     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
8613     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
8614   assert(Options.SimpleReduction && "Only simple reduction is expected.");
8615   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
8616                                  ReductionOps, Options);
8617 }
8618 
8619 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
8620     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
8621     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
8622   llvm_unreachable("Not supported in SIMD-only mode");
8623 }
8624 
8625 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
8626                                                   SourceLocation Loc,
8627                                                   ReductionCodeGen &RCG,
8628                                                   unsigned N) {
8629   llvm_unreachable("Not supported in SIMD-only mode");
8630 }
8631 
8632 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
8633                                                   SourceLocation Loc,
8634                                                   llvm::Value *ReductionsPtr,
8635                                                   LValue SharedLVal) {
8636   llvm_unreachable("Not supported in SIMD-only mode");
8637 }
8638 
8639 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
8640                                            SourceLocation Loc) {
8641   llvm_unreachable("Not supported in SIMD-only mode");
8642 }
8643 
8644 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
8645     CodeGenFunction &CGF, SourceLocation Loc,
8646     OpenMPDirectiveKind CancelRegion) {
8647   llvm_unreachable("Not supported in SIMD-only mode");
8648 }
8649 
8650 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
8651                                          SourceLocation Loc, const Expr *IfCond,
8652                                          OpenMPDirectiveKind CancelRegion) {
8653   llvm_unreachable("Not supported in SIMD-only mode");
8654 }
8655 
8656 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
8657     const OMPExecutableDirective &D, StringRef ParentName,
8658     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
8659     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
8660   llvm_unreachable("Not supported in SIMD-only mode");
8661 }
8662 
8663 void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF,
8664                                          const OMPExecutableDirective &D,
8665                                          llvm::Value *OutlinedFn,
8666                                          llvm::Value *OutlinedFnID,
8667                                          const Expr *IfCond, const Expr *Device) {
8668   llvm_unreachable("Not supported in SIMD-only mode");
8669 }
8670 
8671 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
8672   llvm_unreachable("Not supported in SIMD-only mode");
8673 }
8674 
8675 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
8676   llvm_unreachable("Not supported in SIMD-only mode");
8677 }
8678 
8679 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
8680   return false;
8681 }
8682 
8683 llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() {
8684   return nullptr;
8685 }
8686 
8687 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
8688                                         const OMPExecutableDirective &D,
8689                                         SourceLocation Loc,
8690                                         llvm::Value *OutlinedFn,
8691                                         ArrayRef<llvm::Value *> CapturedVars) {
8692   llvm_unreachable("Not supported in SIMD-only mode");
8693 }
8694 
8695 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
8696                                              const Expr *NumTeams,
8697                                              const Expr *ThreadLimit,
8698                                              SourceLocation Loc) {
8699   llvm_unreachable("Not supported in SIMD-only mode");
8700 }
8701 
8702 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
8703     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
8704     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
8705   llvm_unreachable("Not supported in SIMD-only mode");
8706 }
8707 
8708 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
8709     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
8710     const Expr *Device) {
8711   llvm_unreachable("Not supported in SIMD-only mode");
8712 }
8713 
8714 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
8715                                            const OMPLoopDirective &D) {
8716   llvm_unreachable("Not supported in SIMD-only mode");
8717 }
8718 
8719 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
8720                                               const OMPDependClause *C) {
8721   llvm_unreachable("Not supported in SIMD-only mode");
8722 }
8723 
8724 const VarDecl *
8725 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
8726                                         const VarDecl *NativeParam) const {
8727   llvm_unreachable("Not supported in SIMD-only mode");
8728 }
8729 
8730 Address
8731 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
8732                                          const VarDecl *NativeParam,
8733                                          const VarDecl *TargetParam) const {
8734   llvm_unreachable("Not supported in SIMD-only mode");
8735 }
8736 
8737