1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for OpenMP runtime code generation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGOpenMPRuntime.h"
17 #include "CodeGenFunction.h"
18 #include "clang/CodeGen/ConstantInitBuilder.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/BitmaskEnum.h"
23 #include "llvm/Bitcode/BitcodeReader.h"
24 #include "llvm/IR/CallSite.h"
25 #include "llvm/IR/DerivedTypes.h"
26 #include "llvm/IR/GlobalValue.h"
27 #include "llvm/IR/Value.h"
28 #include "llvm/Support/Format.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <cassert>
31 
32 using namespace clang;
33 using namespace CodeGen;
34 
35 namespace {
36 /// \brief Base class for handling code generation inside OpenMP regions.
37 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
38 public:
39   /// \brief Kinds of OpenMP regions used in codegen.
40   enum CGOpenMPRegionKind {
41     /// \brief Region with outlined function for standalone 'parallel'
42     /// directive.
43     ParallelOutlinedRegion,
44     /// \brief Region with outlined function for standalone 'task' directive.
45     TaskOutlinedRegion,
46     /// \brief Region for constructs that do not require function outlining,
47     /// like 'for', 'sections', 'atomic' etc. directives.
48     InlinedRegion,
49     /// \brief Region with outlined function for standalone 'target' directive.
50     TargetRegion,
51   };
52 
53   CGOpenMPRegionInfo(const CapturedStmt &CS,
54                      const CGOpenMPRegionKind RegionKind,
55                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
56                      bool HasCancel)
57       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
58         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
59 
60   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
61                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
62                      bool HasCancel)
63       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
64         Kind(Kind), HasCancel(HasCancel) {}
65 
66   /// \brief Get a variable or parameter for storing global thread id
67   /// inside OpenMP construct.
68   virtual const VarDecl *getThreadIDVariable() const = 0;
69 
70   /// \brief Emit the captured statement body.
71   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
72 
73   /// \brief Get an LValue for the current ThreadID variable.
74   /// \return LValue for thread id variable. This LValue always has type int32*.
75   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
76 
77   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
78 
79   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
80 
81   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
82 
83   bool hasCancel() const { return HasCancel; }
84 
85   static bool classof(const CGCapturedStmtInfo *Info) {
86     return Info->getKind() == CR_OpenMP;
87   }
88 
89   ~CGOpenMPRegionInfo() override = default;
90 
91 protected:
92   CGOpenMPRegionKind RegionKind;
93   RegionCodeGenTy CodeGen;
94   OpenMPDirectiveKind Kind;
95   bool HasCancel;
96 };
97 
98 /// \brief API for captured statement code generation in OpenMP constructs.
99 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
100 public:
101   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
102                              const RegionCodeGenTy &CodeGen,
103                              OpenMPDirectiveKind Kind, bool HasCancel,
104                              StringRef HelperName)
105       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
106                            HasCancel),
107         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
108     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
109   }
110 
111   /// \brief Get a variable or parameter for storing global thread id
112   /// inside OpenMP construct.
113   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
114 
115   /// \brief Get the name of the capture helper.
116   StringRef getHelperName() const override { return HelperName; }
117 
118   static bool classof(const CGCapturedStmtInfo *Info) {
119     return CGOpenMPRegionInfo::classof(Info) &&
120            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
121                ParallelOutlinedRegion;
122   }
123 
124 private:
125   /// \brief A variable or parameter storing global thread id for OpenMP
126   /// constructs.
127   const VarDecl *ThreadIDVar;
128   StringRef HelperName;
129 };
130 
131 /// \brief API for captured statement code generation in OpenMP constructs.
132 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
133 public:
134   class UntiedTaskActionTy final : public PrePostActionTy {
135     bool Untied;
136     const VarDecl *PartIDVar;
137     const RegionCodeGenTy UntiedCodeGen;
138     llvm::SwitchInst *UntiedSwitch = nullptr;
139 
140   public:
141     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
142                        const RegionCodeGenTy &UntiedCodeGen)
143         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
144     void Enter(CodeGenFunction &CGF) override {
145       if (Untied) {
146         // Emit task switching point.
147         auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
148             CGF.GetAddrOfLocalVar(PartIDVar),
149             PartIDVar->getType()->castAs<PointerType>());
150         auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
151         auto *DoneBB = CGF.createBasicBlock(".untied.done.");
152         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
153         CGF.EmitBlock(DoneBB);
154         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
155         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
156         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
157                               CGF.Builder.GetInsertBlock());
158         emitUntiedSwitch(CGF);
159       }
160     }
161     void emitUntiedSwitch(CodeGenFunction &CGF) const {
162       if (Untied) {
163         auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
164             CGF.GetAddrOfLocalVar(PartIDVar),
165             PartIDVar->getType()->castAs<PointerType>());
166         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
167                               PartIdLVal);
168         UntiedCodeGen(CGF);
169         CodeGenFunction::JumpDest CurPoint =
170             CGF.getJumpDestInCurrentScope(".untied.next.");
171         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
172         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
173         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
174                               CGF.Builder.GetInsertBlock());
175         CGF.EmitBranchThroughCleanup(CurPoint);
176         CGF.EmitBlock(CurPoint.getBlock());
177       }
178     }
179     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
180   };
181   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
182                                  const VarDecl *ThreadIDVar,
183                                  const RegionCodeGenTy &CodeGen,
184                                  OpenMPDirectiveKind Kind, bool HasCancel,
185                                  const UntiedTaskActionTy &Action)
186       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
187         ThreadIDVar(ThreadIDVar), Action(Action) {
188     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
189   }
190 
191   /// \brief Get a variable or parameter for storing global thread id
192   /// inside OpenMP construct.
193   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
194 
195   /// \brief Get an LValue for the current ThreadID variable.
196   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
197 
198   /// \brief Get the name of the capture helper.
199   StringRef getHelperName() const override { return ".omp_outlined."; }
200 
201   void emitUntiedSwitch(CodeGenFunction &CGF) override {
202     Action.emitUntiedSwitch(CGF);
203   }
204 
205   static bool classof(const CGCapturedStmtInfo *Info) {
206     return CGOpenMPRegionInfo::classof(Info) &&
207            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
208                TaskOutlinedRegion;
209   }
210 
211 private:
212   /// \brief A variable or parameter storing global thread id for OpenMP
213   /// constructs.
214   const VarDecl *ThreadIDVar;
215   /// Action for emitting code for untied tasks.
216   const UntiedTaskActionTy &Action;
217 };
218 
219 /// \brief API for inlined captured statement code generation in OpenMP
220 /// constructs.
221 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
222 public:
223   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
224                             const RegionCodeGenTy &CodeGen,
225                             OpenMPDirectiveKind Kind, bool HasCancel)
226       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
227         OldCSI(OldCSI),
228         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
229 
230   // \brief Retrieve the value of the context parameter.
231   llvm::Value *getContextValue() const override {
232     if (OuterRegionInfo)
233       return OuterRegionInfo->getContextValue();
234     llvm_unreachable("No context value for inlined OpenMP region");
235   }
236 
237   void setContextValue(llvm::Value *V) override {
238     if (OuterRegionInfo) {
239       OuterRegionInfo->setContextValue(V);
240       return;
241     }
242     llvm_unreachable("No context value for inlined OpenMP region");
243   }
244 
245   /// \brief Lookup the captured field decl for a variable.
246   const FieldDecl *lookup(const VarDecl *VD) const override {
247     if (OuterRegionInfo)
248       return OuterRegionInfo->lookup(VD);
249     // If there is no outer outlined region,no need to lookup in a list of
250     // captured variables, we can use the original one.
251     return nullptr;
252   }
253 
254   FieldDecl *getThisFieldDecl() const override {
255     if (OuterRegionInfo)
256       return OuterRegionInfo->getThisFieldDecl();
257     return nullptr;
258   }
259 
260   /// \brief Get a variable or parameter for storing global thread id
261   /// inside OpenMP construct.
262   const VarDecl *getThreadIDVariable() const override {
263     if (OuterRegionInfo)
264       return OuterRegionInfo->getThreadIDVariable();
265     return nullptr;
266   }
267 
268   /// \brief Get an LValue for the current ThreadID variable.
269   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
270     if (OuterRegionInfo)
271       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
272     llvm_unreachable("No LValue for inlined OpenMP construct");
273   }
274 
275   /// \brief Get the name of the capture helper.
276   StringRef getHelperName() const override {
277     if (auto *OuterRegionInfo = getOldCSI())
278       return OuterRegionInfo->getHelperName();
279     llvm_unreachable("No helper name for inlined OpenMP construct");
280   }
281 
282   void emitUntiedSwitch(CodeGenFunction &CGF) override {
283     if (OuterRegionInfo)
284       OuterRegionInfo->emitUntiedSwitch(CGF);
285   }
286 
287   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
288 
289   static bool classof(const CGCapturedStmtInfo *Info) {
290     return CGOpenMPRegionInfo::classof(Info) &&
291            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
292   }
293 
294   ~CGOpenMPInlinedRegionInfo() override = default;
295 
296 private:
297   /// \brief CodeGen info about outer OpenMP region.
298   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
299   CGOpenMPRegionInfo *OuterRegionInfo;
300 };
301 
302 /// \brief API for captured statement code generation in OpenMP target
303 /// constructs. For this captures, implicit parameters are used instead of the
304 /// captured fields. The name of the target region has to be unique in a given
305 /// application so it is provided by the client, because only the client has
306 /// the information to generate that.
307 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
308 public:
309   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
310                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
311       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
312                            /*HasCancel=*/false),
313         HelperName(HelperName) {}
314 
315   /// \brief This is unused for target regions because each starts executing
316   /// with a single thread.
317   const VarDecl *getThreadIDVariable() const override { return nullptr; }
318 
319   /// \brief Get the name of the capture helper.
320   StringRef getHelperName() const override { return HelperName; }
321 
322   static bool classof(const CGCapturedStmtInfo *Info) {
323     return CGOpenMPRegionInfo::classof(Info) &&
324            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
325   }
326 
327 private:
328   StringRef HelperName;
329 };
330 
331 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
332   llvm_unreachable("No codegen for expressions");
333 }
334 /// \brief API for generation of expressions captured in a innermost OpenMP
335 /// region.
336 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
337 public:
338   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
339       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
340                                   OMPD_unknown,
341                                   /*HasCancel=*/false),
342         PrivScope(CGF) {
343     // Make sure the globals captured in the provided statement are local by
344     // using the privatization logic. We assume the same variable is not
345     // captured more than once.
346     for (auto &C : CS.captures()) {
347       if (!C.capturesVariable() && !C.capturesVariableByCopy())
348         continue;
349 
350       const VarDecl *VD = C.getCapturedVar();
351       if (VD->isLocalVarDeclOrParm())
352         continue;
353 
354       DeclRefExpr DRE(const_cast<VarDecl *>(VD),
355                       /*RefersToEnclosingVariableOrCapture=*/false,
356                       VD->getType().getNonReferenceType(), VK_LValue,
357                       C.getLocation());
358       PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address {
359         return CGF.EmitLValue(&DRE).getAddress();
360       });
361     }
362     (void)PrivScope.Privatize();
363   }
364 
365   /// \brief Lookup the captured field decl for a variable.
366   const FieldDecl *lookup(const VarDecl *VD) const override {
367     if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
368       return FD;
369     return nullptr;
370   }
371 
372   /// \brief Emit the captured statement body.
373   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
374     llvm_unreachable("No body for expressions");
375   }
376 
377   /// \brief Get a variable or parameter for storing global thread id
378   /// inside OpenMP construct.
379   const VarDecl *getThreadIDVariable() const override {
380     llvm_unreachable("No thread id for expressions");
381   }
382 
383   /// \brief Get the name of the capture helper.
384   StringRef getHelperName() const override {
385     llvm_unreachable("No helper name for expressions");
386   }
387 
388   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
389 
390 private:
391   /// Private scope to capture global variables.
392   CodeGenFunction::OMPPrivateScope PrivScope;
393 };
394 
395 /// \brief RAII for emitting code of OpenMP constructs.
396 class InlinedOpenMPRegionRAII {
397   CodeGenFunction &CGF;
398   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
399   FieldDecl *LambdaThisCaptureField = nullptr;
400   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
401 
402 public:
403   /// \brief Constructs region for combined constructs.
404   /// \param CodeGen Code generation sequence for combined directives. Includes
405   /// a list of functions used for code generation of implicitly inlined
406   /// regions.
407   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
408                           OpenMPDirectiveKind Kind, bool HasCancel)
409       : CGF(CGF) {
410     // Start emission for the construct.
411     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
412         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
413     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
414     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
415     CGF.LambdaThisCaptureField = nullptr;
416     BlockInfo = CGF.BlockInfo;
417     CGF.BlockInfo = nullptr;
418   }
419 
420   ~InlinedOpenMPRegionRAII() {
421     // Restore original CapturedStmtInfo only if we're done with code emission.
422     auto *OldCSI =
423         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
424     delete CGF.CapturedStmtInfo;
425     CGF.CapturedStmtInfo = OldCSI;
426     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
427     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
428     CGF.BlockInfo = BlockInfo;
429   }
430 };
431 
432 /// \brief Values for bit flags used in the ident_t to describe the fields.
433 /// All enumeric elements are named and described in accordance with the code
434 /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
435 enum OpenMPLocationFlags : unsigned {
436   /// \brief Use trampoline for internal microtask.
437   OMP_IDENT_IMD = 0x01,
438   /// \brief Use c-style ident structure.
439   OMP_IDENT_KMPC = 0x02,
440   /// \brief Atomic reduction option for kmpc_reduce.
441   OMP_ATOMIC_REDUCE = 0x10,
442   /// \brief Explicit 'barrier' directive.
443   OMP_IDENT_BARRIER_EXPL = 0x20,
444   /// \brief Implicit barrier in code.
445   OMP_IDENT_BARRIER_IMPL = 0x40,
446   /// \brief Implicit barrier in 'for' directive.
447   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
448   /// \brief Implicit barrier in 'sections' directive.
449   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
450   /// \brief Implicit barrier in 'single' directive.
451   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
452   /// Call of __kmp_for_static_init for static loop.
453   OMP_IDENT_WORK_LOOP = 0x200,
454   /// Call of __kmp_for_static_init for sections.
455   OMP_IDENT_WORK_SECTIONS = 0x400,
456   /// Call of __kmp_for_static_init for distribute.
457   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
458   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
459 };
460 
461 /// \brief Describes ident structure that describes a source location.
462 /// All descriptions are taken from
463 /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
464 /// Original structure:
465 /// typedef struct ident {
466 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
467 ///                                  see above  */
468 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
469 ///                                  KMP_IDENT_KMPC identifies this union
470 ///                                  member  */
471 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
472 ///                                  see above */
473 ///#if USE_ITT_BUILD
474 ///                            /*  but currently used for storing
475 ///                                region-specific ITT */
476 ///                            /*  contextual information. */
477 ///#endif /* USE_ITT_BUILD */
478 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
479 ///                                 C++  */
480 ///    char const *psource;    /**< String describing the source location.
481 ///                            The string is composed of semi-colon separated
482 //                             fields which describe the source file,
483 ///                            the function and a pair of line numbers that
484 ///                            delimit the construct.
485 ///                             */
486 /// } ident_t;
487 enum IdentFieldIndex {
488   /// \brief might be used in Fortran
489   IdentField_Reserved_1,
490   /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
491   IdentField_Flags,
492   /// \brief Not really used in Fortran any more
493   IdentField_Reserved_2,
494   /// \brief Source[4] in Fortran, do not use for C++
495   IdentField_Reserved_3,
496   /// \brief String describing the source location. The string is composed of
497   /// semi-colon separated fields which describe the source file, the function
498   /// and a pair of line numbers that delimit the construct.
499   IdentField_PSource
500 };
501 
502 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from
503 /// the enum sched_type in kmp.h).
504 enum OpenMPSchedType {
505   /// \brief Lower bound for default (unordered) versions.
506   OMP_sch_lower = 32,
507   OMP_sch_static_chunked = 33,
508   OMP_sch_static = 34,
509   OMP_sch_dynamic_chunked = 35,
510   OMP_sch_guided_chunked = 36,
511   OMP_sch_runtime = 37,
512   OMP_sch_auto = 38,
513   /// static with chunk adjustment (e.g., simd)
514   OMP_sch_static_balanced_chunked = 45,
515   /// \brief Lower bound for 'ordered' versions.
516   OMP_ord_lower = 64,
517   OMP_ord_static_chunked = 65,
518   OMP_ord_static = 66,
519   OMP_ord_dynamic_chunked = 67,
520   OMP_ord_guided_chunked = 68,
521   OMP_ord_runtime = 69,
522   OMP_ord_auto = 70,
523   OMP_sch_default = OMP_sch_static,
524   /// \brief dist_schedule types
525   OMP_dist_sch_static_chunked = 91,
526   OMP_dist_sch_static = 92,
527   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
528   /// Set if the monotonic schedule modifier was present.
529   OMP_sch_modifier_monotonic = (1 << 29),
530   /// Set if the nonmonotonic schedule modifier was present.
531   OMP_sch_modifier_nonmonotonic = (1 << 30),
532 };
533 
534 enum OpenMPRTLFunction {
535   /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
536   /// kmpc_micro microtask, ...);
537   OMPRTL__kmpc_fork_call,
538   /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc,
539   /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
540   OMPRTL__kmpc_threadprivate_cached,
541   /// \brief Call to void __kmpc_threadprivate_register( ident_t *,
542   /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
543   OMPRTL__kmpc_threadprivate_register,
544   // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
545   OMPRTL__kmpc_global_thread_num,
546   // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
547   // kmp_critical_name *crit);
548   OMPRTL__kmpc_critical,
549   // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
550   // global_tid, kmp_critical_name *crit, uintptr_t hint);
551   OMPRTL__kmpc_critical_with_hint,
552   // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
553   // kmp_critical_name *crit);
554   OMPRTL__kmpc_end_critical,
555   // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
556   // global_tid);
557   OMPRTL__kmpc_cancel_barrier,
558   // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
559   OMPRTL__kmpc_barrier,
560   // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
561   OMPRTL__kmpc_for_static_fini,
562   // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
563   // global_tid);
564   OMPRTL__kmpc_serialized_parallel,
565   // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
566   // global_tid);
567   OMPRTL__kmpc_end_serialized_parallel,
568   // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
569   // kmp_int32 num_threads);
570   OMPRTL__kmpc_push_num_threads,
571   // Call to void __kmpc_flush(ident_t *loc);
572   OMPRTL__kmpc_flush,
573   // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
574   OMPRTL__kmpc_master,
575   // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
576   OMPRTL__kmpc_end_master,
577   // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
578   // int end_part);
579   OMPRTL__kmpc_omp_taskyield,
580   // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
581   OMPRTL__kmpc_single,
582   // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
583   OMPRTL__kmpc_end_single,
584   // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
585   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
586   // kmp_routine_entry_t *task_entry);
587   OMPRTL__kmpc_omp_task_alloc,
588   // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
589   // new_task);
590   OMPRTL__kmpc_omp_task,
591   // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
592   // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
593   // kmp_int32 didit);
594   OMPRTL__kmpc_copyprivate,
595   // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
596   // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
597   // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
598   OMPRTL__kmpc_reduce,
599   // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
600   // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
601   // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
602   // *lck);
603   OMPRTL__kmpc_reduce_nowait,
604   // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
605   // kmp_critical_name *lck);
606   OMPRTL__kmpc_end_reduce,
607   // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
608   // kmp_critical_name *lck);
609   OMPRTL__kmpc_end_reduce_nowait,
610   // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
611   // kmp_task_t * new_task);
612   OMPRTL__kmpc_omp_task_begin_if0,
613   // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
614   // kmp_task_t * new_task);
615   OMPRTL__kmpc_omp_task_complete_if0,
616   // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
617   OMPRTL__kmpc_ordered,
618   // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
619   OMPRTL__kmpc_end_ordered,
620   // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
621   // global_tid);
622   OMPRTL__kmpc_omp_taskwait,
623   // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
624   OMPRTL__kmpc_taskgroup,
625   // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
626   OMPRTL__kmpc_end_taskgroup,
627   // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
628   // int proc_bind);
629   OMPRTL__kmpc_push_proc_bind,
630   // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
631   // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
632   // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
633   OMPRTL__kmpc_omp_task_with_deps,
634   // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
635   // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
636   // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
637   OMPRTL__kmpc_omp_wait_deps,
638   // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
639   // global_tid, kmp_int32 cncl_kind);
640   OMPRTL__kmpc_cancellationpoint,
641   // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
642   // kmp_int32 cncl_kind);
643   OMPRTL__kmpc_cancel,
644   // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
645   // kmp_int32 num_teams, kmp_int32 thread_limit);
646   OMPRTL__kmpc_push_num_teams,
647   // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
648   // microtask, ...);
649   OMPRTL__kmpc_fork_teams,
650   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
651   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
652   // sched, kmp_uint64 grainsize, void *task_dup);
653   OMPRTL__kmpc_taskloop,
654   // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
655   // num_dims, struct kmp_dim *dims);
656   OMPRTL__kmpc_doacross_init,
657   // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
658   OMPRTL__kmpc_doacross_fini,
659   // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
660   // *vec);
661   OMPRTL__kmpc_doacross_post,
662   // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
663   // *vec);
664   OMPRTL__kmpc_doacross_wait,
665   // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
666   // *data);
667   OMPRTL__kmpc_task_reduction_init,
668   // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
669   // *d);
670   OMPRTL__kmpc_task_reduction_get_th_data,
671 
672   //
673   // Offloading related calls
674   //
675   // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
676   // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
677   // *arg_types);
678   OMPRTL__tgt_target,
679   // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
680   // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
681   // *arg_types);
682   OMPRTL__tgt_target_nowait,
683   // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
684   // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
685   // *arg_types, int32_t num_teams, int32_t thread_limit);
686   OMPRTL__tgt_target_teams,
687   // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
688   // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t
689   // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
690   OMPRTL__tgt_target_teams_nowait,
691   // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
692   OMPRTL__tgt_register_lib,
693   // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
694   OMPRTL__tgt_unregister_lib,
695   // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
696   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
697   OMPRTL__tgt_target_data_begin,
698   // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
699   // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
700   // *arg_types);
701   OMPRTL__tgt_target_data_begin_nowait,
702   // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
703   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
704   OMPRTL__tgt_target_data_end,
705   // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
706   // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
707   // *arg_types);
708   OMPRTL__tgt_target_data_end_nowait,
709   // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
710   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
711   OMPRTL__tgt_target_data_update,
712   // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
713   // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
714   // *arg_types);
715   OMPRTL__tgt_target_data_update_nowait,
716 };
717 
718 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
719 /// region.
720 class CleanupTy final : public EHScopeStack::Cleanup {
721   PrePostActionTy *Action;
722 
723 public:
724   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
725   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
726     if (!CGF.HaveInsertPoint())
727       return;
728     Action->Exit(CGF);
729   }
730 };
731 
732 } // anonymous namespace
733 
734 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
735   CodeGenFunction::RunCleanupsScope Scope(CGF);
736   if (PrePostAction) {
737     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
738     Callback(CodeGen, CGF, *PrePostAction);
739   } else {
740     PrePostActionTy Action;
741     Callback(CodeGen, CGF, Action);
742   }
743 }
744 
745 /// Check if the combiner is a call to UDR combiner and if it is so return the
746 /// UDR decl used for reduction.
747 static const OMPDeclareReductionDecl *
748 getReductionInit(const Expr *ReductionOp) {
749   if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
750     if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
751       if (auto *DRE =
752               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
753         if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
754           return DRD;
755   return nullptr;
756 }
757 
758 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
759                                              const OMPDeclareReductionDecl *DRD,
760                                              const Expr *InitOp,
761                                              Address Private, Address Original,
762                                              QualType Ty) {
763   if (DRD->getInitializer()) {
764     std::pair<llvm::Function *, llvm::Function *> Reduction =
765         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
766     auto *CE = cast<CallExpr>(InitOp);
767     auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
768     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
769     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
770     auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
771     auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
772     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
773     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
774                             [=]() -> Address { return Private; });
775     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
776                             [=]() -> Address { return Original; });
777     (void)PrivateScope.Privatize();
778     RValue Func = RValue::get(Reduction.second);
779     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
780     CGF.EmitIgnoredExpr(InitOp);
781   } else {
782     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
783     auto *GV = new llvm::GlobalVariable(
784         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
785         llvm::GlobalValue::PrivateLinkage, Init, ".init");
786     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
787     RValue InitRVal;
788     switch (CGF.getEvaluationKind(Ty)) {
789     case TEK_Scalar:
790       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
791       break;
792     case TEK_Complex:
793       InitRVal =
794           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
795       break;
796     case TEK_Aggregate:
797       InitRVal = RValue::getAggregate(LV.getAddress());
798       break;
799     }
800     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
801     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
802     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
803                          /*IsInitializer=*/false);
804   }
805 }
806 
807 /// \brief Emit initialization of arrays of complex types.
808 /// \param DestAddr Address of the array.
809 /// \param Type Type of array.
810 /// \param Init Initial expression of array.
811 /// \param SrcAddr Address of the original array.
812 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
813                                  QualType Type, bool EmitDeclareReductionInit,
814                                  const Expr *Init,
815                                  const OMPDeclareReductionDecl *DRD,
816                                  Address SrcAddr = Address::invalid()) {
817   // Perform element-by-element initialization.
818   QualType ElementTy;
819 
820   // Drill down to the base element type on both arrays.
821   auto ArrayTy = Type->getAsArrayTypeUnsafe();
822   auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
823   DestAddr =
824       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
825   if (DRD)
826     SrcAddr =
827         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
828 
829   llvm::Value *SrcBegin = nullptr;
830   if (DRD)
831     SrcBegin = SrcAddr.getPointer();
832   auto DestBegin = DestAddr.getPointer();
833   // Cast from pointer to array type to pointer to single element.
834   auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
835   // The basic structure here is a while-do loop.
836   auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
837   auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
838   auto IsEmpty =
839       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
840   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
841 
842   // Enter the loop body, making that address the current address.
843   auto EntryBB = CGF.Builder.GetInsertBlock();
844   CGF.EmitBlock(BodyBB);
845 
846   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
847 
848   llvm::PHINode *SrcElementPHI = nullptr;
849   Address SrcElementCurrent = Address::invalid();
850   if (DRD) {
851     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
852                                           "omp.arraycpy.srcElementPast");
853     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
854     SrcElementCurrent =
855         Address(SrcElementPHI,
856                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
857   }
858   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
859       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
860   DestElementPHI->addIncoming(DestBegin, EntryBB);
861   Address DestElementCurrent =
862       Address(DestElementPHI,
863               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
864 
865   // Emit copy.
866   {
867     CodeGenFunction::RunCleanupsScope InitScope(CGF);
868     if (EmitDeclareReductionInit) {
869       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
870                                        SrcElementCurrent, ElementTy);
871     } else
872       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
873                            /*IsInitializer=*/false);
874   }
875 
876   if (DRD) {
877     // Shift the address forward by one element.
878     auto SrcElementNext = CGF.Builder.CreateConstGEP1_32(
879         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
880     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
881   }
882 
883   // Shift the address forward by one element.
884   auto DestElementNext = CGF.Builder.CreateConstGEP1_32(
885       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
886   // Check whether we've reached the end.
887   auto Done =
888       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
889   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
890   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
891 
892   // Done.
893   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
894 }
895 
896 static llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy>
897 isDeclareTargetDeclaration(const ValueDecl *VD) {
898   for (const auto *D : VD->redecls()) {
899     if (!D->hasAttrs())
900       continue;
901     if (const auto *Attr = D->getAttr<OMPDeclareTargetDeclAttr>())
902       return Attr->getMapType();
903   }
904   return llvm::None;
905 }
906 
907 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
908   return CGF.EmitOMPSharedLValue(E);
909 }
910 
911 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
912                                             const Expr *E) {
913   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
914     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
915   return LValue();
916 }
917 
918 void ReductionCodeGen::emitAggregateInitialization(
919     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
920     const OMPDeclareReductionDecl *DRD) {
921   // Emit VarDecl with copy init for arrays.
922   // Get the address of the original variable captured in current
923   // captured region.
924   auto *PrivateVD =
925       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
926   bool EmitDeclareReductionInit =
927       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
928   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
929                        EmitDeclareReductionInit,
930                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
931                                                 : PrivateVD->getInit(),
932                        DRD, SharedLVal.getAddress());
933 }
934 
935 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
936                                    ArrayRef<const Expr *> Privates,
937                                    ArrayRef<const Expr *> ReductionOps) {
938   ClausesData.reserve(Shareds.size());
939   SharedAddresses.reserve(Shareds.size());
940   Sizes.reserve(Shareds.size());
941   BaseDecls.reserve(Shareds.size());
942   auto IPriv = Privates.begin();
943   auto IRed = ReductionOps.begin();
944   for (const auto *Ref : Shareds) {
945     ClausesData.emplace_back(Ref, *IPriv, *IRed);
946     std::advance(IPriv, 1);
947     std::advance(IRed, 1);
948   }
949 }
950 
951 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
952   assert(SharedAddresses.size() == N &&
953          "Number of generated lvalues must be exactly N.");
954   LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
955   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
956   SharedAddresses.emplace_back(First, Second);
957 }
958 
959 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
960   auto *PrivateVD =
961       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
962   QualType PrivateType = PrivateVD->getType();
963   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
964   if (!PrivateType->isVariablyModifiedType()) {
965     Sizes.emplace_back(
966         CGF.getTypeSize(
967             SharedAddresses[N].first.getType().getNonReferenceType()),
968         nullptr);
969     return;
970   }
971   llvm::Value *Size;
972   llvm::Value *SizeInChars;
973   llvm::Type *ElemType =
974       cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType())
975           ->getElementType();
976   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
977   if (AsArraySection) {
978     Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
979                                      SharedAddresses[N].first.getPointer());
980     Size = CGF.Builder.CreateNUWAdd(
981         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
982     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
983   } else {
984     SizeInChars = CGF.getTypeSize(
985         SharedAddresses[N].first.getType().getNonReferenceType());
986     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
987   }
988   Sizes.emplace_back(SizeInChars, Size);
989   CodeGenFunction::OpaqueValueMapping OpaqueMap(
990       CGF,
991       cast<OpaqueValueExpr>(
992           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
993       RValue::get(Size));
994   CGF.EmitVariablyModifiedType(PrivateType);
995 }
996 
997 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
998                                          llvm::Value *Size) {
999   auto *PrivateVD =
1000       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1001   QualType PrivateType = PrivateVD->getType();
1002   if (!PrivateType->isVariablyModifiedType()) {
1003     assert(!Size && !Sizes[N].second &&
1004            "Size should be nullptr for non-variably modified reduction "
1005            "items.");
1006     return;
1007   }
1008   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1009       CGF,
1010       cast<OpaqueValueExpr>(
1011           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1012       RValue::get(Size));
1013   CGF.EmitVariablyModifiedType(PrivateType);
1014 }
1015 
1016 void ReductionCodeGen::emitInitialization(
1017     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1018     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1019   assert(SharedAddresses.size() > N && "No variable was generated");
1020   auto *PrivateVD =
1021       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1022   auto *DRD = getReductionInit(ClausesData[N].ReductionOp);
1023   QualType PrivateType = PrivateVD->getType();
1024   PrivateAddr = CGF.Builder.CreateElementBitCast(
1025       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1026   QualType SharedType = SharedAddresses[N].first.getType();
1027   SharedLVal = CGF.MakeAddrLValue(
1028       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
1029                                        CGF.ConvertTypeForMem(SharedType)),
1030       SharedType, SharedAddresses[N].first.getBaseInfo(),
1031       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1032   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1033     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1034   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1035     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1036                                      PrivateAddr, SharedLVal.getAddress(),
1037                                      SharedLVal.getType());
1038   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1039              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1040     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1041                          PrivateVD->getType().getQualifiers(),
1042                          /*IsInitializer=*/false);
1043   }
1044 }
1045 
1046 bool ReductionCodeGen::needCleanups(unsigned N) {
1047   auto *PrivateVD =
1048       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1049   QualType PrivateType = PrivateVD->getType();
1050   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1051   return DTorKind != QualType::DK_none;
1052 }
1053 
1054 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
1055                                     Address PrivateAddr) {
1056   auto *PrivateVD =
1057       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1058   QualType PrivateType = PrivateVD->getType();
1059   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1060   if (needCleanups(N)) {
1061     PrivateAddr = CGF.Builder.CreateElementBitCast(
1062         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1063     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1064   }
1065 }
1066 
1067 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1068                           LValue BaseLV) {
1069   BaseTy = BaseTy.getNonReferenceType();
1070   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1071          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1072     if (auto *PtrTy = BaseTy->getAs<PointerType>())
1073       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
1074     else {
1075       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
1076       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1077     }
1078     BaseTy = BaseTy->getPointeeType();
1079   }
1080   return CGF.MakeAddrLValue(
1081       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(),
1082                                        CGF.ConvertTypeForMem(ElTy)),
1083       BaseLV.getType(), BaseLV.getBaseInfo(),
1084       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1085 }
1086 
1087 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1088                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1089                           llvm::Value *Addr) {
1090   Address Tmp = Address::invalid();
1091   Address TopTmp = Address::invalid();
1092   Address MostTopTmp = Address::invalid();
1093   BaseTy = BaseTy.getNonReferenceType();
1094   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1095          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1096     Tmp = CGF.CreateMemTemp(BaseTy);
1097     if (TopTmp.isValid())
1098       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1099     else
1100       MostTopTmp = Tmp;
1101     TopTmp = Tmp;
1102     BaseTy = BaseTy->getPointeeType();
1103   }
1104   llvm::Type *Ty = BaseLVType;
1105   if (Tmp.isValid())
1106     Ty = Tmp.getElementType();
1107   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1108   if (Tmp.isValid()) {
1109     CGF.Builder.CreateStore(Addr, Tmp);
1110     return MostTopTmp;
1111   }
1112   return Address(Addr, BaseLVAlignment);
1113 }
1114 
1115 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1116   const VarDecl *OrigVD = nullptr;
1117   if (auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1118     auto *Base = OASE->getBase()->IgnoreParenImpCasts();
1119     while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1120       Base = TempOASE->getBase()->IgnoreParenImpCasts();
1121     while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1122       Base = TempASE->getBase()->IgnoreParenImpCasts();
1123     DE = cast<DeclRefExpr>(Base);
1124     OrigVD = cast<VarDecl>(DE->getDecl());
1125   } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1126     auto *Base = ASE->getBase()->IgnoreParenImpCasts();
1127     while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1128       Base = TempASE->getBase()->IgnoreParenImpCasts();
1129     DE = cast<DeclRefExpr>(Base);
1130     OrigVD = cast<VarDecl>(DE->getDecl());
1131   }
1132   return OrigVD;
1133 }
1134 
1135 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1136                                                Address PrivateAddr) {
1137   const DeclRefExpr *DE;
1138   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1139     BaseDecls.emplace_back(OrigVD);
1140     auto OriginalBaseLValue = CGF.EmitLValue(DE);
1141     LValue BaseLValue =
1142         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1143                     OriginalBaseLValue);
1144     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1145         BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
1146     llvm::Value *PrivatePointer =
1147         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1148             PrivateAddr.getPointer(),
1149             SharedAddresses[N].first.getAddress().getType());
1150     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1151     return castToBase(CGF, OrigVD->getType(),
1152                       SharedAddresses[N].first.getType(),
1153                       OriginalBaseLValue.getAddress().getType(),
1154                       OriginalBaseLValue.getAlignment(), Ptr);
1155   }
1156   BaseDecls.emplace_back(
1157       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1158   return PrivateAddr;
1159 }
1160 
1161 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1162   auto *DRD = getReductionInit(ClausesData[N].ReductionOp);
1163   return DRD && DRD->getInitializer();
1164 }
1165 
1166 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1167   return CGF.EmitLoadOfPointerLValue(
1168       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1169       getThreadIDVariable()->getType()->castAs<PointerType>());
1170 }
1171 
1172 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1173   if (!CGF.HaveInsertPoint())
1174     return;
1175   // 1.2.2 OpenMP Language Terminology
1176   // Structured block - An executable statement with a single entry at the
1177   // top and a single exit at the bottom.
1178   // The point of exit cannot be a branch out of the structured block.
1179   // longjmp() and throw() must not violate the entry/exit criteria.
1180   CGF.EHStack.pushTerminate();
1181   CodeGen(CGF);
1182   CGF.EHStack.popTerminate();
1183 }
1184 
1185 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1186     CodeGenFunction &CGF) {
1187   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1188                             getThreadIDVariable()->getType(),
1189                             AlignmentSource::Decl);
1190 }
1191 
1192 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
1193     : CGM(CGM), OffloadEntriesInfoManager(CGM) {
1194   IdentTy = llvm::StructType::create(
1195       "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
1196       CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
1197       CGM.Int8PtrTy /* psource */);
1198   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1199 
1200   loadOffloadInfoMetadata();
1201 }
1202 
1203 void CGOpenMPRuntime::clear() {
1204   InternalVars.clear();
1205 }
1206 
1207 static llvm::Function *
1208 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1209                           const Expr *CombinerInitializer, const VarDecl *In,
1210                           const VarDecl *Out, bool IsCombiner) {
1211   // void .omp_combiner.(Ty *in, Ty *out);
1212   auto &C = CGM.getContext();
1213   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1214   FunctionArgList Args;
1215   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1216                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1217   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1218                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1219   Args.push_back(&OmpOutParm);
1220   Args.push_back(&OmpInParm);
1221   auto &FnInfo =
1222       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1223   auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1224   auto *Fn = llvm::Function::Create(
1225       FnTy, llvm::GlobalValue::InternalLinkage,
1226       IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule());
1227   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1228   Fn->removeFnAttr(llvm::Attribute::NoInline);
1229   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1230   Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1231   CodeGenFunction CGF(CGM);
1232   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1233   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1234   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1235                     Out->getLocation());
1236   CodeGenFunction::OMPPrivateScope Scope(CGF);
1237   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1238   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address {
1239     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1240         .getAddress();
1241   });
1242   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1243   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() -> Address {
1244     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1245         .getAddress();
1246   });
1247   (void)Scope.Privatize();
1248   if (!IsCombiner && Out->hasInit() &&
1249       !CGF.isTrivialInitializer(Out->getInit())) {
1250     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1251                          Out->getType().getQualifiers(),
1252                          /*IsInitializer=*/true);
1253   }
1254   if (CombinerInitializer)
1255     CGF.EmitIgnoredExpr(CombinerInitializer);
1256   Scope.ForceCleanup();
1257   CGF.FinishFunction();
1258   return Fn;
1259 }
1260 
1261 void CGOpenMPRuntime::emitUserDefinedReduction(
1262     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1263   if (UDRMap.count(D) > 0)
1264     return;
1265   auto &C = CGM.getContext();
1266   if (!In || !Out) {
1267     In = &C.Idents.get("omp_in");
1268     Out = &C.Idents.get("omp_out");
1269   }
1270   llvm::Function *Combiner = emitCombinerOrInitializer(
1271       CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()),
1272       cast<VarDecl>(D->lookup(Out).front()),
1273       /*IsCombiner=*/true);
1274   llvm::Function *Initializer = nullptr;
1275   if (auto *Init = D->getInitializer()) {
1276     if (!Priv || !Orig) {
1277       Priv = &C.Idents.get("omp_priv");
1278       Orig = &C.Idents.get("omp_orig");
1279     }
1280     Initializer = emitCombinerOrInitializer(
1281         CGM, D->getType(),
1282         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1283                                                                      : nullptr,
1284         cast<VarDecl>(D->lookup(Orig).front()),
1285         cast<VarDecl>(D->lookup(Priv).front()),
1286         /*IsCombiner=*/false);
1287   }
1288   UDRMap.insert(std::make_pair(D, std::make_pair(Combiner, Initializer)));
1289   if (CGF) {
1290     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1291     Decls.second.push_back(D);
1292   }
1293 }
1294 
1295 std::pair<llvm::Function *, llvm::Function *>
1296 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1297   auto I = UDRMap.find(D);
1298   if (I != UDRMap.end())
1299     return I->second;
1300   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1301   return UDRMap.lookup(D);
1302 }
1303 
1304 // Layout information for ident_t.
1305 static CharUnits getIdentAlign(CodeGenModule &CGM) {
1306   return CGM.getPointerAlign();
1307 }
1308 static CharUnits getIdentSize(CodeGenModule &CGM) {
1309   assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign()));
1310   return CharUnits::fromQuantity(16) + CGM.getPointerSize();
1311 }
1312 static CharUnits getOffsetOfIdentField(IdentFieldIndex Field) {
1313   // All the fields except the last are i32, so this works beautifully.
1314   return unsigned(Field) * CharUnits::fromQuantity(4);
1315 }
1316 static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr,
1317                                    IdentFieldIndex Field,
1318                                    const llvm::Twine &Name = "") {
1319   auto Offset = getOffsetOfIdentField(Field);
1320   return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name);
1321 }
1322 
1323 static llvm::Value *emitParallelOrTeamsOutlinedFunction(
1324     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1325     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1326     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1327   assert(ThreadIDVar->getType()->isPointerType() &&
1328          "thread id variable must be of type kmp_int32 *");
1329   CodeGenFunction CGF(CGM, true);
1330   bool HasCancel = false;
1331   if (auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1332     HasCancel = OPD->hasCancel();
1333   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1334     HasCancel = OPSD->hasCancel();
1335   else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1336     HasCancel = OPFD->hasCancel();
1337   else if (auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1338     HasCancel = OPFD->hasCancel();
1339   else if (auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1340     HasCancel = OPFD->hasCancel();
1341   else if (auto *OPFD = dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1342     HasCancel = OPFD->hasCancel();
1343   else if (auto *OPFD =
1344                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1345     HasCancel = OPFD->hasCancel();
1346   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1347                                     HasCancel, OutlinedHelperName);
1348   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1349   return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
1350 }
1351 
1352 llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction(
1353     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1354     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1355   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1356   return emitParallelOrTeamsOutlinedFunction(
1357       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1358 }
1359 
1360 llvm::Value *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1361     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1362     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1363   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1364   return emitParallelOrTeamsOutlinedFunction(
1365       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1366 }
1367 
1368 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
1369     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1370     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1371     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1372     bool Tied, unsigned &NumberOfParts) {
1373   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1374                                               PrePostActionTy &) {
1375     auto *ThreadID = getThreadID(CGF, D.getLocStart());
1376     auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart());
1377     llvm::Value *TaskArgs[] = {
1378         UpLoc, ThreadID,
1379         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1380                                     TaskTVar->getType()->castAs<PointerType>())
1381             .getPointer()};
1382     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1383   };
1384   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1385                                                             UntiedCodeGen);
1386   CodeGen.setAction(Action);
1387   assert(!ThreadIDVar->getType()->isPointerType() &&
1388          "thread id variable must be of type kmp_int32 for tasks");
1389   const OpenMPDirectiveKind Region =
1390       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1391                                                       : OMPD_task;
1392   auto *CS = D.getCapturedStmt(Region);
1393   auto *TD = dyn_cast<OMPTaskDirective>(&D);
1394   CodeGenFunction CGF(CGM, true);
1395   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1396                                         InnermostKind,
1397                                         TD ? TD->hasCancel() : false, Action);
1398   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1399   auto *Res = CGF.GenerateCapturedStmtFunction(*CS);
1400   if (!Tied)
1401     NumberOfParts = Action.getNumberOfParts();
1402   return Res;
1403 }
1404 
1405 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1406   CharUnits Align = getIdentAlign(CGM);
1407   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
1408   if (!Entry) {
1409     if (!DefaultOpenMPPSource) {
1410       // Initialize default location for psource field of ident_t structure of
1411       // all ident_t objects. Format is ";file;function;line;column;;".
1412       // Taken from
1413       // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
1414       DefaultOpenMPPSource =
1415           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1416       DefaultOpenMPPSource =
1417           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1418     }
1419 
1420     ConstantInitBuilder builder(CGM);
1421     auto fields = builder.beginStruct(IdentTy);
1422     fields.addInt(CGM.Int32Ty, 0);
1423     fields.addInt(CGM.Int32Ty, Flags);
1424     fields.addInt(CGM.Int32Ty, 0);
1425     fields.addInt(CGM.Int32Ty, 0);
1426     fields.add(DefaultOpenMPPSource);
1427     auto DefaultOpenMPLocation =
1428       fields.finishAndCreateGlobal("", Align, /*isConstant*/ true,
1429                                    llvm::GlobalValue::PrivateLinkage);
1430     DefaultOpenMPLocation->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
1431 
1432     OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
1433   }
1434   return Address(Entry, Align);
1435 }
1436 
1437 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1438                                                  SourceLocation Loc,
1439                                                  unsigned Flags) {
1440   Flags |= OMP_IDENT_KMPC;
1441   // If no debug info is generated - return global default location.
1442   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1443       Loc.isInvalid())
1444     return getOrCreateDefaultLocation(Flags).getPointer();
1445 
1446   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1447 
1448   Address LocValue = Address::invalid();
1449   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1450   if (I != OpenMPLocThreadIDMap.end())
1451     LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM));
1452 
1453   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1454   // GetOpenMPThreadID was called before this routine.
1455   if (!LocValue.isValid()) {
1456     // Generate "ident_t .kmpc_loc.addr;"
1457     Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM),
1458                                       ".kmpc_loc.addr");
1459     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1460     Elem.second.DebugLoc = AI.getPointer();
1461     LocValue = AI;
1462 
1463     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1464     CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
1465     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1466                              CGM.getSize(getIdentSize(CGF.CGM)));
1467   }
1468 
1469   // char **psource = &.kmpc_loc_<flags>.addr.psource;
1470   Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource);
1471 
1472   auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1473   if (OMPDebugLoc == nullptr) {
1474     SmallString<128> Buffer2;
1475     llvm::raw_svector_ostream OS2(Buffer2);
1476     // Build debug location
1477     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1478     OS2 << ";" << PLoc.getFilename() << ";";
1479     if (const FunctionDecl *FD =
1480             dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
1481       OS2 << FD->getQualifiedNameAsString();
1482     }
1483     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1484     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1485     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1486   }
1487   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1488   CGF.Builder.CreateStore(OMPDebugLoc, PSource);
1489 
1490   // Our callers always pass this to a runtime function, so for
1491   // convenience, go ahead and return a naked pointer.
1492   return LocValue.getPointer();
1493 }
1494 
1495 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1496                                           SourceLocation Loc) {
1497   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1498 
1499   llvm::Value *ThreadID = nullptr;
1500   // Check whether we've already cached a load of the thread id in this
1501   // function.
1502   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1503   if (I != OpenMPLocThreadIDMap.end()) {
1504     ThreadID = I->second.ThreadID;
1505     if (ThreadID != nullptr)
1506       return ThreadID;
1507   }
1508   // If exceptions are enabled, do not use parameter to avoid possible crash.
1509   if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1510       !CGF.getLangOpts().CXXExceptions ||
1511       CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1512     if (auto *OMPRegionInfo =
1513             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1514       if (OMPRegionInfo->getThreadIDVariable()) {
1515         // Check if this an outlined function with thread id passed as argument.
1516         auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1517         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1518         // If value loaded in entry block, cache it and use it everywhere in
1519         // function.
1520         if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1521           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1522           Elem.second.ThreadID = ThreadID;
1523         }
1524         return ThreadID;
1525       }
1526     }
1527   }
1528 
1529   // This is not an outlined function region - need to call __kmpc_int32
1530   // kmpc_global_thread_num(ident_t *loc).
1531   // Generate thread id value and cache this value for use across the
1532   // function.
1533   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1534   CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
1535   auto *Call = CGF.Builder.CreateCall(
1536       createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1537       emitUpdateLocation(CGF, Loc));
1538   Call->setCallingConv(CGF.getRuntimeCC());
1539   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1540   Elem.second.ThreadID = Call;
1541   return Call;
1542 }
1543 
1544 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1545   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1546   if (OpenMPLocThreadIDMap.count(CGF.CurFn))
1547     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1548   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1549     for(auto *D : FunctionUDRMap[CGF.CurFn]) {
1550       UDRMap.erase(D);
1551     }
1552     FunctionUDRMap.erase(CGF.CurFn);
1553   }
1554 }
1555 
1556 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1557   if (!IdentTy) {
1558   }
1559   return llvm::PointerType::getUnqual(IdentTy);
1560 }
1561 
1562 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1563   if (!Kmpc_MicroTy) {
1564     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1565     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1566                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1567     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1568   }
1569   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1570 }
1571 
1572 llvm::Constant *
1573 CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1574   llvm::Constant *RTLFn = nullptr;
1575   switch (static_cast<OpenMPRTLFunction>(Function)) {
1576   case OMPRTL__kmpc_fork_call: {
1577     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1578     // microtask, ...);
1579     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1580                                 getKmpc_MicroPointerTy()};
1581     llvm::FunctionType *FnTy =
1582         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1583     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1584     break;
1585   }
1586   case OMPRTL__kmpc_global_thread_num: {
1587     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1588     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1589     llvm::FunctionType *FnTy =
1590         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1591     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1592     break;
1593   }
1594   case OMPRTL__kmpc_threadprivate_cached: {
1595     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1596     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1597     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1598                                 CGM.VoidPtrTy, CGM.SizeTy,
1599                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1600     llvm::FunctionType *FnTy =
1601         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1602     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1603     break;
1604   }
1605   case OMPRTL__kmpc_critical: {
1606     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1607     // kmp_critical_name *crit);
1608     llvm::Type *TypeParams[] = {
1609         getIdentTyPointerTy(), CGM.Int32Ty,
1610         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1611     llvm::FunctionType *FnTy =
1612         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1613     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1614     break;
1615   }
1616   case OMPRTL__kmpc_critical_with_hint: {
1617     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1618     // kmp_critical_name *crit, uintptr_t hint);
1619     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1620                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
1621                                 CGM.IntPtrTy};
1622     llvm::FunctionType *FnTy =
1623         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1624     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1625     break;
1626   }
1627   case OMPRTL__kmpc_threadprivate_register: {
1628     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1629     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1630     // typedef void *(*kmpc_ctor)(void *);
1631     auto KmpcCtorTy =
1632         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1633                                 /*isVarArg*/ false)->getPointerTo();
1634     // typedef void *(*kmpc_cctor)(void *, void *);
1635     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1636     auto KmpcCopyCtorTy =
1637         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1638                                 /*isVarArg*/ false)->getPointerTo();
1639     // typedef void (*kmpc_dtor)(void *);
1640     auto KmpcDtorTy =
1641         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1642             ->getPointerTo();
1643     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1644                               KmpcCopyCtorTy, KmpcDtorTy};
1645     auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1646                                         /*isVarArg*/ false);
1647     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1648     break;
1649   }
1650   case OMPRTL__kmpc_end_critical: {
1651     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1652     // kmp_critical_name *crit);
1653     llvm::Type *TypeParams[] = {
1654         getIdentTyPointerTy(), CGM.Int32Ty,
1655         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1656     llvm::FunctionType *FnTy =
1657         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1658     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1659     break;
1660   }
1661   case OMPRTL__kmpc_cancel_barrier: {
1662     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1663     // global_tid);
1664     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1665     llvm::FunctionType *FnTy =
1666         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1667     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1668     break;
1669   }
1670   case OMPRTL__kmpc_barrier: {
1671     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1672     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1673     llvm::FunctionType *FnTy =
1674         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1675     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1676     break;
1677   }
1678   case OMPRTL__kmpc_for_static_fini: {
1679     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1680     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1681     llvm::FunctionType *FnTy =
1682         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1683     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1684     break;
1685   }
1686   case OMPRTL__kmpc_push_num_threads: {
1687     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1688     // kmp_int32 num_threads)
1689     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1690                                 CGM.Int32Ty};
1691     llvm::FunctionType *FnTy =
1692         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1693     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1694     break;
1695   }
1696   case OMPRTL__kmpc_serialized_parallel: {
1697     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1698     // global_tid);
1699     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1700     llvm::FunctionType *FnTy =
1701         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1702     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1703     break;
1704   }
1705   case OMPRTL__kmpc_end_serialized_parallel: {
1706     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1707     // global_tid);
1708     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1709     llvm::FunctionType *FnTy =
1710         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1711     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1712     break;
1713   }
1714   case OMPRTL__kmpc_flush: {
1715     // Build void __kmpc_flush(ident_t *loc);
1716     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1717     llvm::FunctionType *FnTy =
1718         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1719     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1720     break;
1721   }
1722   case OMPRTL__kmpc_master: {
1723     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1724     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1725     llvm::FunctionType *FnTy =
1726         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1727     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1728     break;
1729   }
1730   case OMPRTL__kmpc_end_master: {
1731     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1732     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1733     llvm::FunctionType *FnTy =
1734         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1735     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1736     break;
1737   }
1738   case OMPRTL__kmpc_omp_taskyield: {
1739     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1740     // int end_part);
1741     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1742     llvm::FunctionType *FnTy =
1743         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1744     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1745     break;
1746   }
1747   case OMPRTL__kmpc_single: {
1748     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1749     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1750     llvm::FunctionType *FnTy =
1751         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1752     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1753     break;
1754   }
1755   case OMPRTL__kmpc_end_single: {
1756     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1757     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1758     llvm::FunctionType *FnTy =
1759         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1760     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1761     break;
1762   }
1763   case OMPRTL__kmpc_omp_task_alloc: {
1764     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1765     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1766     // kmp_routine_entry_t *task_entry);
1767     assert(KmpRoutineEntryPtrTy != nullptr &&
1768            "Type kmp_routine_entry_t must be created.");
1769     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1770                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
1771     // Return void * and then cast to particular kmp_task_t type.
1772     llvm::FunctionType *FnTy =
1773         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1774     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
1775     break;
1776   }
1777   case OMPRTL__kmpc_omp_task: {
1778     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1779     // *new_task);
1780     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1781                                 CGM.VoidPtrTy};
1782     llvm::FunctionType *FnTy =
1783         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1784     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
1785     break;
1786   }
1787   case OMPRTL__kmpc_copyprivate: {
1788     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
1789     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
1790     // kmp_int32 didit);
1791     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1792     auto *CpyFnTy =
1793         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
1794     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
1795                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
1796                                 CGM.Int32Ty};
1797     llvm::FunctionType *FnTy =
1798         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1799     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
1800     break;
1801   }
1802   case OMPRTL__kmpc_reduce: {
1803     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
1804     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
1805     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
1806     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1807     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1808                                                /*isVarArg=*/false);
1809     llvm::Type *TypeParams[] = {
1810         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1811         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1812         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1813     llvm::FunctionType *FnTy =
1814         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1815     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
1816     break;
1817   }
1818   case OMPRTL__kmpc_reduce_nowait: {
1819     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
1820     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
1821     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
1822     // *lck);
1823     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1824     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1825                                                /*isVarArg=*/false);
1826     llvm::Type *TypeParams[] = {
1827         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1828         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1829         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1830     llvm::FunctionType *FnTy =
1831         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1832     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
1833     break;
1834   }
1835   case OMPRTL__kmpc_end_reduce: {
1836     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
1837     // kmp_critical_name *lck);
1838     llvm::Type *TypeParams[] = {
1839         getIdentTyPointerTy(), CGM.Int32Ty,
1840         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1841     llvm::FunctionType *FnTy =
1842         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1843     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
1844     break;
1845   }
1846   case OMPRTL__kmpc_end_reduce_nowait: {
1847     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
1848     // kmp_critical_name *lck);
1849     llvm::Type *TypeParams[] = {
1850         getIdentTyPointerTy(), CGM.Int32Ty,
1851         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1852     llvm::FunctionType *FnTy =
1853         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1854     RTLFn =
1855         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
1856     break;
1857   }
1858   case OMPRTL__kmpc_omp_task_begin_if0: {
1859     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1860     // *new_task);
1861     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1862                                 CGM.VoidPtrTy};
1863     llvm::FunctionType *FnTy =
1864         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1865     RTLFn =
1866         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
1867     break;
1868   }
1869   case OMPRTL__kmpc_omp_task_complete_if0: {
1870     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1871     // *new_task);
1872     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1873                                 CGM.VoidPtrTy};
1874     llvm::FunctionType *FnTy =
1875         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1876     RTLFn = CGM.CreateRuntimeFunction(FnTy,
1877                                       /*Name=*/"__kmpc_omp_task_complete_if0");
1878     break;
1879   }
1880   case OMPRTL__kmpc_ordered: {
1881     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
1882     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1883     llvm::FunctionType *FnTy =
1884         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1885     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
1886     break;
1887   }
1888   case OMPRTL__kmpc_end_ordered: {
1889     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
1890     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1891     llvm::FunctionType *FnTy =
1892         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1893     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
1894     break;
1895   }
1896   case OMPRTL__kmpc_omp_taskwait: {
1897     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
1898     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1899     llvm::FunctionType *FnTy =
1900         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1901     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
1902     break;
1903   }
1904   case OMPRTL__kmpc_taskgroup: {
1905     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
1906     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1907     llvm::FunctionType *FnTy =
1908         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1909     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
1910     break;
1911   }
1912   case OMPRTL__kmpc_end_taskgroup: {
1913     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
1914     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1915     llvm::FunctionType *FnTy =
1916         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1917     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
1918     break;
1919   }
1920   case OMPRTL__kmpc_push_proc_bind: {
1921     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
1922     // int proc_bind)
1923     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1924     llvm::FunctionType *FnTy =
1925         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1926     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
1927     break;
1928   }
1929   case OMPRTL__kmpc_omp_task_with_deps: {
1930     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
1931     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
1932     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
1933     llvm::Type *TypeParams[] = {
1934         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
1935         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
1936     llvm::FunctionType *FnTy =
1937         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1938     RTLFn =
1939         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
1940     break;
1941   }
1942   case OMPRTL__kmpc_omp_wait_deps: {
1943     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
1944     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
1945     // kmp_depend_info_t *noalias_dep_list);
1946     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1947                                 CGM.Int32Ty,           CGM.VoidPtrTy,
1948                                 CGM.Int32Ty,           CGM.VoidPtrTy};
1949     llvm::FunctionType *FnTy =
1950         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1951     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
1952     break;
1953   }
1954   case OMPRTL__kmpc_cancellationpoint: {
1955     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
1956     // global_tid, kmp_int32 cncl_kind)
1957     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1958     llvm::FunctionType *FnTy =
1959         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1960     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
1961     break;
1962   }
1963   case OMPRTL__kmpc_cancel: {
1964     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
1965     // kmp_int32 cncl_kind)
1966     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1967     llvm::FunctionType *FnTy =
1968         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1969     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
1970     break;
1971   }
1972   case OMPRTL__kmpc_push_num_teams: {
1973     // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
1974     // kmp_int32 num_teams, kmp_int32 num_threads)
1975     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1976         CGM.Int32Ty};
1977     llvm::FunctionType *FnTy =
1978         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1979     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
1980     break;
1981   }
1982   case OMPRTL__kmpc_fork_teams: {
1983     // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
1984     // microtask, ...);
1985     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1986                                 getKmpc_MicroPointerTy()};
1987     llvm::FunctionType *FnTy =
1988         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1989     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
1990     break;
1991   }
1992   case OMPRTL__kmpc_taskloop: {
1993     // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
1994     // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
1995     // sched, kmp_uint64 grainsize, void *task_dup);
1996     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
1997                                 CGM.IntTy,
1998                                 CGM.VoidPtrTy,
1999                                 CGM.IntTy,
2000                                 CGM.Int64Ty->getPointerTo(),
2001                                 CGM.Int64Ty->getPointerTo(),
2002                                 CGM.Int64Ty,
2003                                 CGM.IntTy,
2004                                 CGM.IntTy,
2005                                 CGM.Int64Ty,
2006                                 CGM.VoidPtrTy};
2007     llvm::FunctionType *FnTy =
2008         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2009     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2010     break;
2011   }
2012   case OMPRTL__kmpc_doacross_init: {
2013     // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2014     // num_dims, struct kmp_dim *dims);
2015     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2016                                 CGM.Int32Ty,
2017                                 CGM.Int32Ty,
2018                                 CGM.VoidPtrTy};
2019     llvm::FunctionType *FnTy =
2020         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2021     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2022     break;
2023   }
2024   case OMPRTL__kmpc_doacross_fini: {
2025     // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2026     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2027     llvm::FunctionType *FnTy =
2028         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2029     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2030     break;
2031   }
2032   case OMPRTL__kmpc_doacross_post: {
2033     // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2034     // *vec);
2035     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2036                                 CGM.Int64Ty->getPointerTo()};
2037     llvm::FunctionType *FnTy =
2038         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2039     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2040     break;
2041   }
2042   case OMPRTL__kmpc_doacross_wait: {
2043     // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2044     // *vec);
2045     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2046                                 CGM.Int64Ty->getPointerTo()};
2047     llvm::FunctionType *FnTy =
2048         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2049     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2050     break;
2051   }
2052   case OMPRTL__kmpc_task_reduction_init: {
2053     // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2054     // *data);
2055     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2056     llvm::FunctionType *FnTy =
2057         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2058     RTLFn =
2059         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2060     break;
2061   }
2062   case OMPRTL__kmpc_task_reduction_get_th_data: {
2063     // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2064     // *d);
2065     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2066     llvm::FunctionType *FnTy =
2067         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2068     RTLFn = CGM.CreateRuntimeFunction(
2069         FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2070     break;
2071   }
2072   case OMPRTL__tgt_target: {
2073     // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2074     // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2075     // *arg_types);
2076     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2077                                 CGM.VoidPtrTy,
2078                                 CGM.Int32Ty,
2079                                 CGM.VoidPtrPtrTy,
2080                                 CGM.VoidPtrPtrTy,
2081                                 CGM.SizeTy->getPointerTo(),
2082                                 CGM.Int64Ty->getPointerTo()};
2083     llvm::FunctionType *FnTy =
2084         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2085     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2086     break;
2087   }
2088   case OMPRTL__tgt_target_nowait: {
2089     // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2090     // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
2091     // int64_t *arg_types);
2092     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2093                                 CGM.VoidPtrTy,
2094                                 CGM.Int32Ty,
2095                                 CGM.VoidPtrPtrTy,
2096                                 CGM.VoidPtrPtrTy,
2097                                 CGM.SizeTy->getPointerTo(),
2098                                 CGM.Int64Ty->getPointerTo()};
2099     llvm::FunctionType *FnTy =
2100         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2101     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2102     break;
2103   }
2104   case OMPRTL__tgt_target_teams: {
2105     // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2106     // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
2107     // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2108     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2109                                 CGM.VoidPtrTy,
2110                                 CGM.Int32Ty,
2111                                 CGM.VoidPtrPtrTy,
2112                                 CGM.VoidPtrPtrTy,
2113                                 CGM.SizeTy->getPointerTo(),
2114                                 CGM.Int64Ty->getPointerTo(),
2115                                 CGM.Int32Ty,
2116                                 CGM.Int32Ty};
2117     llvm::FunctionType *FnTy =
2118         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2119     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2120     break;
2121   }
2122   case OMPRTL__tgt_target_teams_nowait: {
2123     // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2124     // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t
2125     // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2126     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2127                                 CGM.VoidPtrTy,
2128                                 CGM.Int32Ty,
2129                                 CGM.VoidPtrPtrTy,
2130                                 CGM.VoidPtrPtrTy,
2131                                 CGM.SizeTy->getPointerTo(),
2132                                 CGM.Int64Ty->getPointerTo(),
2133                                 CGM.Int32Ty,
2134                                 CGM.Int32Ty};
2135     llvm::FunctionType *FnTy =
2136         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2137     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2138     break;
2139   }
2140   case OMPRTL__tgt_register_lib: {
2141     // Build void __tgt_register_lib(__tgt_bin_desc *desc);
2142     QualType ParamTy =
2143         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2144     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2145     llvm::FunctionType *FnTy =
2146         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2147     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
2148     break;
2149   }
2150   case OMPRTL__tgt_unregister_lib: {
2151     // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
2152     QualType ParamTy =
2153         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2154     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2155     llvm::FunctionType *FnTy =
2156         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2157     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
2158     break;
2159   }
2160   case OMPRTL__tgt_target_data_begin: {
2161     // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2162     // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2163     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2164                                 CGM.Int32Ty,
2165                                 CGM.VoidPtrPtrTy,
2166                                 CGM.VoidPtrPtrTy,
2167                                 CGM.SizeTy->getPointerTo(),
2168                                 CGM.Int64Ty->getPointerTo()};
2169     llvm::FunctionType *FnTy =
2170         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2171     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2172     break;
2173   }
2174   case OMPRTL__tgt_target_data_begin_nowait: {
2175     // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2176     // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2177     // *arg_types);
2178     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2179                                 CGM.Int32Ty,
2180                                 CGM.VoidPtrPtrTy,
2181                                 CGM.VoidPtrPtrTy,
2182                                 CGM.SizeTy->getPointerTo(),
2183                                 CGM.Int64Ty->getPointerTo()};
2184     auto *FnTy =
2185         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2186     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2187     break;
2188   }
2189   case OMPRTL__tgt_target_data_end: {
2190     // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2191     // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2192     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2193                                 CGM.Int32Ty,
2194                                 CGM.VoidPtrPtrTy,
2195                                 CGM.VoidPtrPtrTy,
2196                                 CGM.SizeTy->getPointerTo(),
2197                                 CGM.Int64Ty->getPointerTo()};
2198     llvm::FunctionType *FnTy =
2199         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2200     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2201     break;
2202   }
2203   case OMPRTL__tgt_target_data_end_nowait: {
2204     // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2205     // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2206     // *arg_types);
2207     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2208                                 CGM.Int32Ty,
2209                                 CGM.VoidPtrPtrTy,
2210                                 CGM.VoidPtrPtrTy,
2211                                 CGM.SizeTy->getPointerTo(),
2212                                 CGM.Int64Ty->getPointerTo()};
2213     auto *FnTy =
2214         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2215     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2216     break;
2217   }
2218   case OMPRTL__tgt_target_data_update: {
2219     // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2220     // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2221     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2222                                 CGM.Int32Ty,
2223                                 CGM.VoidPtrPtrTy,
2224                                 CGM.VoidPtrPtrTy,
2225                                 CGM.SizeTy->getPointerTo(),
2226                                 CGM.Int64Ty->getPointerTo()};
2227     llvm::FunctionType *FnTy =
2228         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2229     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2230     break;
2231   }
2232   case OMPRTL__tgt_target_data_update_nowait: {
2233     // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2234     // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2235     // *arg_types);
2236     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2237                                 CGM.Int32Ty,
2238                                 CGM.VoidPtrPtrTy,
2239                                 CGM.VoidPtrPtrTy,
2240                                 CGM.SizeTy->getPointerTo(),
2241                                 CGM.Int64Ty->getPointerTo()};
2242     auto *FnTy =
2243         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2244     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2245     break;
2246   }
2247   }
2248   assert(RTLFn && "Unable to find OpenMP runtime function");
2249   return RTLFn;
2250 }
2251 
2252 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
2253                                                              bool IVSigned) {
2254   assert((IVSize == 32 || IVSize == 64) &&
2255          "IV size is not compatible with the omp runtime");
2256   auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2257                                        : "__kmpc_for_static_init_4u")
2258                            : (IVSigned ? "__kmpc_for_static_init_8"
2259                                        : "__kmpc_for_static_init_8u");
2260   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2261   auto PtrTy = llvm::PointerType::getUnqual(ITy);
2262   llvm::Type *TypeParams[] = {
2263     getIdentTyPointerTy(),                     // loc
2264     CGM.Int32Ty,                               // tid
2265     CGM.Int32Ty,                               // schedtype
2266     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2267     PtrTy,                                     // p_lower
2268     PtrTy,                                     // p_upper
2269     PtrTy,                                     // p_stride
2270     ITy,                                       // incr
2271     ITy                                        // chunk
2272   };
2273   llvm::FunctionType *FnTy =
2274       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2275   return CGM.CreateRuntimeFunction(FnTy, Name);
2276 }
2277 
2278 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
2279                                                             bool IVSigned) {
2280   assert((IVSize == 32 || IVSize == 64) &&
2281          "IV size is not compatible with the omp runtime");
2282   auto Name =
2283       IVSize == 32
2284           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2285           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2286   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2287   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2288                                CGM.Int32Ty,           // tid
2289                                CGM.Int32Ty,           // schedtype
2290                                ITy,                   // lower
2291                                ITy,                   // upper
2292                                ITy,                   // stride
2293                                ITy                    // chunk
2294   };
2295   llvm::FunctionType *FnTy =
2296       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2297   return CGM.CreateRuntimeFunction(FnTy, Name);
2298 }
2299 
2300 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
2301                                                             bool IVSigned) {
2302   assert((IVSize == 32 || IVSize == 64) &&
2303          "IV size is not compatible with the omp runtime");
2304   auto Name =
2305       IVSize == 32
2306           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2307           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2308   llvm::Type *TypeParams[] = {
2309       getIdentTyPointerTy(), // loc
2310       CGM.Int32Ty,           // tid
2311   };
2312   llvm::FunctionType *FnTy =
2313       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2314   return CGM.CreateRuntimeFunction(FnTy, Name);
2315 }
2316 
2317 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
2318                                                             bool IVSigned) {
2319   assert((IVSize == 32 || IVSize == 64) &&
2320          "IV size is not compatible with the omp runtime");
2321   auto Name =
2322       IVSize == 32
2323           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2324           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2325   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2326   auto PtrTy = llvm::PointerType::getUnqual(ITy);
2327   llvm::Type *TypeParams[] = {
2328     getIdentTyPointerTy(),                     // loc
2329     CGM.Int32Ty,                               // tid
2330     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2331     PtrTy,                                     // p_lower
2332     PtrTy,                                     // p_upper
2333     PtrTy                                      // p_stride
2334   };
2335   llvm::FunctionType *FnTy =
2336       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2337   return CGM.CreateRuntimeFunction(FnTy, Name);
2338 }
2339 
2340 Address CGOpenMPRuntime::getAddrOfDeclareTargetLink(const VarDecl *VD) {
2341   if (CGM.getLangOpts().OpenMPSimd)
2342     return Address::invalid();
2343   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2344       isDeclareTargetDeclaration(VD);
2345   if (Res && *Res == OMPDeclareTargetDeclAttr::MT_Link) {
2346     SmallString<64> PtrName;
2347     {
2348       llvm::raw_svector_ostream OS(PtrName);
2349       OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_link_ptr";
2350     }
2351     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2352     if (!Ptr) {
2353       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2354       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
2355                                         PtrName);
2356       if (!CGM.getLangOpts().OpenMPIsDevice) {
2357         auto *GV = cast<llvm::GlobalVariable>(Ptr);
2358         GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
2359         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2360       }
2361       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ptr));
2362       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2363     }
2364     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2365   }
2366   return Address::invalid();
2367 }
2368 
2369 llvm::Constant *
2370 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
2371   assert(!CGM.getLangOpts().OpenMPUseTLS ||
2372          !CGM.getContext().getTargetInfo().isTLSSupported());
2373   // Lookup the entry, lazily creating it if necessary.
2374   return getOrCreateInternalVariable(CGM.Int8PtrPtrTy,
2375                                      Twine(CGM.getMangledName(VD)) + ".cache.");
2376 }
2377 
2378 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
2379                                                 const VarDecl *VD,
2380                                                 Address VDAddr,
2381                                                 SourceLocation Loc) {
2382   if (CGM.getLangOpts().OpenMPUseTLS &&
2383       CGM.getContext().getTargetInfo().isTLSSupported())
2384     return VDAddr;
2385 
2386   auto VarTy = VDAddr.getElementType();
2387   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2388                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2389                                                        CGM.Int8PtrTy),
2390                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
2391                          getOrCreateThreadPrivateCache(VD)};
2392   return Address(CGF.EmitRuntimeCall(
2393       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2394                  VDAddr.getAlignment());
2395 }
2396 
2397 void CGOpenMPRuntime::emitThreadPrivateVarInit(
2398     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2399     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2400   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2401   // library.
2402   auto OMPLoc = emitUpdateLocation(CGF, Loc);
2403   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
2404                       OMPLoc);
2405   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2406   // to register constructor/destructor for variable.
2407   llvm::Value *Args[] = {OMPLoc,
2408                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2409                                                        CGM.VoidPtrTy),
2410                          Ctor, CopyCtor, Dtor};
2411   CGF.EmitRuntimeCall(
2412       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
2413 }
2414 
2415 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
2416     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2417     bool PerformInit, CodeGenFunction *CGF) {
2418   if (CGM.getLangOpts().OpenMPUseTLS &&
2419       CGM.getContext().getTargetInfo().isTLSSupported())
2420     return nullptr;
2421 
2422   VD = VD->getDefinition(CGM.getContext());
2423   if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
2424     ThreadPrivateWithDefinition.insert(VD);
2425     QualType ASTTy = VD->getType();
2426 
2427     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2428     auto Init = VD->getAnyInitializer();
2429     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2430       // Generate function that re-emits the declaration's initializer into the
2431       // threadprivate copy of the variable VD
2432       CodeGenFunction CtorCGF(CGM);
2433       FunctionArgList Args;
2434       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2435                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2436                             ImplicitParamDecl::Other);
2437       Args.push_back(&Dst);
2438 
2439       auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2440           CGM.getContext().VoidPtrTy, Args);
2441       auto FTy = CGM.getTypes().GetFunctionType(FI);
2442       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
2443           FTy, ".__kmpc_global_ctor_.", FI, Loc);
2444       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2445                             Args, Loc, Loc);
2446       auto ArgVal = CtorCGF.EmitLoadOfScalar(
2447           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2448           CGM.getContext().VoidPtrTy, Dst.getLocation());
2449       Address Arg = Address(ArgVal, VDAddr.getAlignment());
2450       Arg = CtorCGF.Builder.CreateElementBitCast(
2451           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2452       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2453                                /*IsInitializer=*/true);
2454       ArgVal = CtorCGF.EmitLoadOfScalar(
2455           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2456           CGM.getContext().VoidPtrTy, Dst.getLocation());
2457       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2458       CtorCGF.FinishFunction();
2459       Ctor = Fn;
2460     }
2461     if (VD->getType().isDestructedType() != QualType::DK_none) {
2462       // Generate function that emits destructor call for the threadprivate copy
2463       // of the variable VD
2464       CodeGenFunction DtorCGF(CGM);
2465       FunctionArgList Args;
2466       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2467                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2468                             ImplicitParamDecl::Other);
2469       Args.push_back(&Dst);
2470 
2471       auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2472           CGM.getContext().VoidTy, Args);
2473       auto FTy = CGM.getTypes().GetFunctionType(FI);
2474       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
2475           FTy, ".__kmpc_global_dtor_.", FI, Loc);
2476       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2477       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2478                             Loc, Loc);
2479       // Create a scope with an artificial location for the body of this function.
2480       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2481       auto ArgVal = DtorCGF.EmitLoadOfScalar(
2482           DtorCGF.GetAddrOfLocalVar(&Dst),
2483           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2484       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2485                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2486                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2487       DtorCGF.FinishFunction();
2488       Dtor = Fn;
2489     }
2490     // Do not emit init function if it is not required.
2491     if (!Ctor && !Dtor)
2492       return nullptr;
2493 
2494     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2495     auto CopyCtorTy =
2496         llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2497                                 /*isVarArg=*/false)->getPointerTo();
2498     // Copying constructor for the threadprivate variable.
2499     // Must be NULL - reserved by runtime, but currently it requires that this
2500     // parameter is always NULL. Otherwise it fires assertion.
2501     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2502     if (Ctor == nullptr) {
2503       auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2504                                             /*isVarArg=*/false)->getPointerTo();
2505       Ctor = llvm::Constant::getNullValue(CtorTy);
2506     }
2507     if (Dtor == nullptr) {
2508       auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2509                                             /*isVarArg=*/false)->getPointerTo();
2510       Dtor = llvm::Constant::getNullValue(DtorTy);
2511     }
2512     if (!CGF) {
2513       auto InitFunctionTy =
2514           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2515       auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2516           InitFunctionTy, ".__omp_threadprivate_init_.",
2517           CGM.getTypes().arrangeNullaryFunction());
2518       CodeGenFunction InitCGF(CGM);
2519       FunctionArgList ArgList;
2520       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2521                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
2522                             Loc, Loc);
2523       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2524       InitCGF.FinishFunction();
2525       return InitFunction;
2526     }
2527     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2528   }
2529   return nullptr;
2530 }
2531 
2532 /// \brief Obtain information that uniquely identifies a target entry. This
2533 /// consists of the file and device IDs as well as line number associated with
2534 /// the relevant entry source location.
2535 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
2536                                      unsigned &DeviceID, unsigned &FileID,
2537                                      unsigned &LineNum) {
2538 
2539   auto &SM = C.getSourceManager();
2540 
2541   // The loc should be always valid and have a file ID (the user cannot use
2542   // #pragma directives in macros)
2543 
2544   assert(Loc.isValid() && "Source location is expected to be always valid.");
2545   assert(Loc.isFileID() && "Source location is expected to refer to a file.");
2546 
2547   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2548   assert(PLoc.isValid() && "Source location is expected to be always valid.");
2549 
2550   llvm::sys::fs::UniqueID ID;
2551   if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2552     llvm_unreachable("Source file with target region no longer exists!");
2553 
2554   DeviceID = ID.getDevice();
2555   FileID = ID.getFile();
2556   LineNum = PLoc.getLine();
2557 }
2558 
2559 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
2560                                                      llvm::GlobalVariable *Addr,
2561                                                      bool PerformInit) {
2562   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2563       isDeclareTargetDeclaration(VD);
2564   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link)
2565     return false;
2566   VD = VD->getDefinition(CGM.getContext());
2567   if (VD && !DeclareTargetWithDefinition.insert(VD).second)
2568     return CGM.getLangOpts().OpenMPIsDevice;
2569 
2570   QualType ASTTy = VD->getType();
2571 
2572   SourceLocation Loc = VD->getCanonicalDecl()->getLocStart();
2573   // Produce the unique prefix to identify the new target regions. We use
2574   // the source location of the variable declaration which we know to not
2575   // conflict with any target region.
2576   unsigned DeviceID;
2577   unsigned FileID;
2578   unsigned Line;
2579   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2580   SmallString<128> Buffer, Out;
2581   {
2582     llvm::raw_svector_ostream OS(Buffer);
2583     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2584        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2585   }
2586 
2587   const Expr *Init = VD->getAnyInitializer();
2588   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2589     llvm::Constant *Ctor;
2590     llvm::Constant *ID;
2591     if (CGM.getLangOpts().OpenMPIsDevice) {
2592       // Generate function that re-emits the declaration's initializer into
2593       // the threadprivate copy of the variable VD
2594       CodeGenFunction CtorCGF(CGM);
2595 
2596       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2597       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2598       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2599           FTy, Twine(Buffer, "_ctor"), FI, Loc);
2600       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2601       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2602                             FunctionArgList(), Loc, Loc);
2603       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2604       CtorCGF.EmitAnyExprToMem(Init,
2605                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
2606                                Init->getType().getQualifiers(),
2607                                /*IsInitializer=*/true);
2608       CtorCGF.FinishFunction();
2609       Ctor = Fn;
2610       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2611     } else {
2612       Ctor = new llvm::GlobalVariable(
2613           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2614           llvm::GlobalValue::PrivateLinkage,
2615           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2616       ID = Ctor;
2617     }
2618 
2619     // Register the information for the entry associated with the constructor.
2620     Out.clear();
2621     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2622         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2623         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
2624   }
2625   if (VD->getType().isDestructedType() != QualType::DK_none) {
2626     llvm::Constant *Dtor;
2627     llvm::Constant *ID;
2628     if (CGM.getLangOpts().OpenMPIsDevice) {
2629       // Generate function that emits destructor call for the threadprivate
2630       // copy of the variable VD
2631       CodeGenFunction DtorCGF(CGM);
2632 
2633       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2634       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2635       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2636           FTy, Twine(Buffer, "_dtor"), FI, Loc);
2637       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2638       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2639                             FunctionArgList(), Loc, Loc);
2640       // Create a scope with an artificial location for the body of this
2641       // function.
2642       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2643       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2644                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2645                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2646       DtorCGF.FinishFunction();
2647       Dtor = Fn;
2648       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2649     } else {
2650       Dtor = new llvm::GlobalVariable(
2651           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2652           llvm::GlobalValue::PrivateLinkage,
2653           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2654       ID = Dtor;
2655     }
2656     // Register the information for the entry associated with the destructor.
2657     Out.clear();
2658     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2659         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2660         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2661   }
2662   return CGM.getLangOpts().OpenMPIsDevice;
2663 }
2664 
2665 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2666                                                           QualType VarType,
2667                                                           StringRef Name) {
2668   llvm::Twine VarName(Name, ".artificial.");
2669   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2670   llvm::Value *GAddr = getOrCreateInternalVariable(VarLVType, VarName);
2671   llvm::Value *Args[] = {
2672       emitUpdateLocation(CGF, SourceLocation()),
2673       getThreadID(CGF, SourceLocation()),
2674       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2675       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2676                                 /*IsSigned=*/false),
2677       getOrCreateInternalVariable(CGM.VoidPtrPtrTy, VarName + ".cache.")};
2678   return Address(
2679       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2680           CGF.EmitRuntimeCall(
2681               createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2682           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2683       CGM.getPointerAlign());
2684 }
2685 
2686 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
2687 /// function. Here is the logic:
2688 /// if (Cond) {
2689 ///   ThenGen();
2690 /// } else {
2691 ///   ElseGen();
2692 /// }
2693 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
2694                                       const RegionCodeGenTy &ThenGen,
2695                                       const RegionCodeGenTy &ElseGen) {
2696   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2697 
2698   // If the condition constant folds and can be elided, try to avoid emitting
2699   // the condition and the dead arm of the if/else.
2700   bool CondConstant;
2701   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2702     if (CondConstant)
2703       ThenGen(CGF);
2704     else
2705       ElseGen(CGF);
2706     return;
2707   }
2708 
2709   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2710   // emit the conditional branch.
2711   auto ThenBlock = CGF.createBasicBlock("omp_if.then");
2712   auto ElseBlock = CGF.createBasicBlock("omp_if.else");
2713   auto ContBlock = CGF.createBasicBlock("omp_if.end");
2714   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2715 
2716   // Emit the 'then' code.
2717   CGF.EmitBlock(ThenBlock);
2718   ThenGen(CGF);
2719   CGF.EmitBranch(ContBlock);
2720   // Emit the 'else' code if present.
2721   // There is no need to emit line number for unconditional branch.
2722   (void)ApplyDebugLocation::CreateEmpty(CGF);
2723   CGF.EmitBlock(ElseBlock);
2724   ElseGen(CGF);
2725   // There is no need to emit line number for unconditional branch.
2726   (void)ApplyDebugLocation::CreateEmpty(CGF);
2727   CGF.EmitBranch(ContBlock);
2728   // Emit the continuation block for code after the if.
2729   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2730 }
2731 
2732 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2733                                        llvm::Value *OutlinedFn,
2734                                        ArrayRef<llvm::Value *> CapturedVars,
2735                                        const Expr *IfCond) {
2736   if (!CGF.HaveInsertPoint())
2737     return;
2738   auto *RTLoc = emitUpdateLocation(CGF, Loc);
2739   auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
2740                                                      PrePostActionTy &) {
2741     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2742     auto &RT = CGF.CGM.getOpenMPRuntime();
2743     llvm::Value *Args[] = {
2744         RTLoc,
2745         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2746         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2747     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2748     RealArgs.append(std::begin(Args), std::end(Args));
2749     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2750 
2751     auto RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
2752     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2753   };
2754   auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
2755                                                           PrePostActionTy &) {
2756     auto &RT = CGF.CGM.getOpenMPRuntime();
2757     auto ThreadID = RT.getThreadID(CGF, Loc);
2758     // Build calls:
2759     // __kmpc_serialized_parallel(&Loc, GTid);
2760     llvm::Value *Args[] = {RTLoc, ThreadID};
2761     CGF.EmitRuntimeCall(
2762         RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
2763 
2764     // OutlinedFn(&GTid, &zero, CapturedStruct);
2765     auto ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2766     Address ZeroAddr =
2767         CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
2768                              /*Name*/ ".zero.addr");
2769     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
2770     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2771     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2772     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2773     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2774     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2775 
2776     // __kmpc_end_serialized_parallel(&Loc, GTid);
2777     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2778     CGF.EmitRuntimeCall(
2779         RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
2780         EndArgs);
2781   };
2782   if (IfCond)
2783     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
2784   else {
2785     RegionCodeGenTy ThenRCG(ThenGen);
2786     ThenRCG(CGF);
2787   }
2788 }
2789 
2790 // If we're inside an (outlined) parallel region, use the region info's
2791 // thread-ID variable (it is passed in a first argument of the outlined function
2792 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2793 // regular serial code region, get thread ID by calling kmp_int32
2794 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2795 // return the address of that temp.
2796 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2797                                              SourceLocation Loc) {
2798   if (auto *OMPRegionInfo =
2799           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2800     if (OMPRegionInfo->getThreadIDVariable())
2801       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
2802 
2803   auto ThreadID = getThreadID(CGF, Loc);
2804   auto Int32Ty =
2805       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2806   auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2807   CGF.EmitStoreOfScalar(ThreadID,
2808                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2809 
2810   return ThreadIDTemp;
2811 }
2812 
2813 llvm::Constant *
2814 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
2815                                              const llvm::Twine &Name) {
2816   SmallString<256> Buffer;
2817   llvm::raw_svector_ostream Out(Buffer);
2818   Out << Name;
2819   auto RuntimeName = Out.str();
2820   auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
2821   if (Elem.second) {
2822     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2823            "OMP internal variable has different type than requested");
2824     return &*Elem.second;
2825   }
2826 
2827   return Elem.second = new llvm::GlobalVariable(
2828              CGM.getModule(), Ty, /*IsConstant*/ false,
2829              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2830              Elem.first());
2831 }
2832 
2833 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2834   llvm::Twine Name(".gomp_critical_user_", CriticalName);
2835   return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
2836 }
2837 
2838 namespace {
2839 /// Common pre(post)-action for different OpenMP constructs.
2840 class CommonActionTy final : public PrePostActionTy {
2841   llvm::Value *EnterCallee;
2842   ArrayRef<llvm::Value *> EnterArgs;
2843   llvm::Value *ExitCallee;
2844   ArrayRef<llvm::Value *> ExitArgs;
2845   bool Conditional;
2846   llvm::BasicBlock *ContBlock = nullptr;
2847 
2848 public:
2849   CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs,
2850                  llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs,
2851                  bool Conditional = false)
2852       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2853         ExitArgs(ExitArgs), Conditional(Conditional) {}
2854   void Enter(CodeGenFunction &CGF) override {
2855     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2856     if (Conditional) {
2857       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2858       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2859       ContBlock = CGF.createBasicBlock("omp_if.end");
2860       // Generate the branch (If-stmt)
2861       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2862       CGF.EmitBlock(ThenBlock);
2863     }
2864   }
2865   void Done(CodeGenFunction &CGF) {
2866     // Emit the rest of blocks/branches
2867     CGF.EmitBranch(ContBlock);
2868     CGF.EmitBlock(ContBlock, true);
2869   }
2870   void Exit(CodeGenFunction &CGF) override {
2871     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2872   }
2873 };
2874 } // anonymous namespace
2875 
2876 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2877                                          StringRef CriticalName,
2878                                          const RegionCodeGenTy &CriticalOpGen,
2879                                          SourceLocation Loc, const Expr *Hint) {
2880   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2881   // CriticalOpGen();
2882   // __kmpc_end_critical(ident_t *, gtid, Lock);
2883   // Prepare arguments and build a call to __kmpc_critical
2884   if (!CGF.HaveInsertPoint())
2885     return;
2886   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2887                          getCriticalRegionLock(CriticalName)};
2888   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2889                                                 std::end(Args));
2890   if (Hint) {
2891     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2892         CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
2893   }
2894   CommonActionTy Action(
2895       createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
2896                                  : OMPRTL__kmpc_critical),
2897       EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
2898   CriticalOpGen.setAction(Action);
2899   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2900 }
2901 
2902 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2903                                        const RegionCodeGenTy &MasterOpGen,
2904                                        SourceLocation Loc) {
2905   if (!CGF.HaveInsertPoint())
2906     return;
2907   // if(__kmpc_master(ident_t *, gtid)) {
2908   //   MasterOpGen();
2909   //   __kmpc_end_master(ident_t *, gtid);
2910   // }
2911   // Prepare arguments and build a call to __kmpc_master
2912   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2913   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
2914                         createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
2915                         /*Conditional=*/true);
2916   MasterOpGen.setAction(Action);
2917   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2918   Action.Done(CGF);
2919 }
2920 
2921 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2922                                         SourceLocation Loc) {
2923   if (!CGF.HaveInsertPoint())
2924     return;
2925   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2926   llvm::Value *Args[] = {
2927       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2928       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2929   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
2930   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2931     Region->emitUntiedSwitch(CGF);
2932 }
2933 
2934 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2935                                           const RegionCodeGenTy &TaskgroupOpGen,
2936                                           SourceLocation Loc) {
2937   if (!CGF.HaveInsertPoint())
2938     return;
2939   // __kmpc_taskgroup(ident_t *, gtid);
2940   // TaskgroupOpGen();
2941   // __kmpc_end_taskgroup(ident_t *, gtid);
2942   // Prepare arguments and build a call to __kmpc_taskgroup
2943   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2944   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
2945                         createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
2946                         Args);
2947   TaskgroupOpGen.setAction(Action);
2948   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2949 }
2950 
2951 /// Given an array of pointers to variables, project the address of a
2952 /// given variable.
2953 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2954                                       unsigned Index, const VarDecl *Var) {
2955   // Pull out the pointer to the variable.
2956   Address PtrAddr =
2957       CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize());
2958   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2959 
2960   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2961   Addr = CGF.Builder.CreateElementBitCast(
2962       Addr, CGF.ConvertTypeForMem(Var->getType()));
2963   return Addr;
2964 }
2965 
2966 static llvm::Value *emitCopyprivateCopyFunction(
2967     CodeGenModule &CGM, llvm::Type *ArgsType,
2968     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2969     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2970     SourceLocation Loc) {
2971   auto &C = CGM.getContext();
2972   // void copy_func(void *LHSArg, void *RHSArg);
2973   FunctionArgList Args;
2974   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2975                            ImplicitParamDecl::Other);
2976   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2977                            ImplicitParamDecl::Other);
2978   Args.push_back(&LHSArg);
2979   Args.push_back(&RHSArg);
2980   auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2981   auto *Fn = llvm::Function::Create(
2982       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
2983       ".omp.copyprivate.copy_func", &CGM.getModule());
2984   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2985   Fn->setDoesNotRecurse();
2986   CodeGenFunction CGF(CGM);
2987   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2988   // Dest = (void*[n])(LHSArg);
2989   // Src = (void*[n])(RHSArg);
2990   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2991       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2992       ArgsType), CGF.getPointerAlign());
2993   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2994       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2995       ArgsType), CGF.getPointerAlign());
2996   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2997   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2998   // ...
2999   // *(Typen*)Dst[n] = *(Typen*)Src[n];
3000   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
3001     auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3002     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3003 
3004     auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3005     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3006 
3007     auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3008     QualType Type = VD->getType();
3009     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3010   }
3011   CGF.FinishFunction();
3012   return Fn;
3013 }
3014 
3015 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
3016                                        const RegionCodeGenTy &SingleOpGen,
3017                                        SourceLocation Loc,
3018                                        ArrayRef<const Expr *> CopyprivateVars,
3019                                        ArrayRef<const Expr *> SrcExprs,
3020                                        ArrayRef<const Expr *> DstExprs,
3021                                        ArrayRef<const Expr *> AssignmentOps) {
3022   if (!CGF.HaveInsertPoint())
3023     return;
3024   assert(CopyprivateVars.size() == SrcExprs.size() &&
3025          CopyprivateVars.size() == DstExprs.size() &&
3026          CopyprivateVars.size() == AssignmentOps.size());
3027   auto &C = CGM.getContext();
3028   // int32 did_it = 0;
3029   // if(__kmpc_single(ident_t *, gtid)) {
3030   //   SingleOpGen();
3031   //   __kmpc_end_single(ident_t *, gtid);
3032   //   did_it = 1;
3033   // }
3034   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3035   // <copy_func>, did_it);
3036 
3037   Address DidIt = Address::invalid();
3038   if (!CopyprivateVars.empty()) {
3039     // int32 did_it = 0;
3040     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3041     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3042     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3043   }
3044   // Prepare arguments and build a call to __kmpc_single
3045   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3046   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3047                         createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
3048                         /*Conditional=*/true);
3049   SingleOpGen.setAction(Action);
3050   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3051   if (DidIt.isValid()) {
3052     // did_it = 1;
3053     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3054   }
3055   Action.Done(CGF);
3056   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3057   // <copy_func>, did_it);
3058   if (DidIt.isValid()) {
3059     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3060     auto CopyprivateArrayTy =
3061         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
3062                                /*IndexTypeQuals=*/0);
3063     // Create a list of all private variables for copyprivate.
3064     Address CopyprivateList =
3065         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3066     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
3067       Address Elem = CGF.Builder.CreateConstArrayGEP(
3068           CopyprivateList, I, CGF.getPointerSize());
3069       CGF.Builder.CreateStore(
3070           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3071               CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
3072           Elem);
3073     }
3074     // Build function that copies private values from single region to all other
3075     // threads in the corresponding parallel region.
3076     auto *CpyFn = emitCopyprivateCopyFunction(
3077         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
3078         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3079     auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3080     Address CL =
3081       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3082                                                       CGF.VoidPtrTy);
3083     auto *DidItVal = CGF.Builder.CreateLoad(DidIt);
3084     llvm::Value *Args[] = {
3085         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3086         getThreadID(CGF, Loc),        // i32 <gtid>
3087         BufSize,                      // size_t <buf_size>
3088         CL.getPointer(),              // void *<copyprivate list>
3089         CpyFn,                        // void (*) (void *, void *) <copy_func>
3090         DidItVal                      // i32 did_it
3091     };
3092     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
3093   }
3094 }
3095 
3096 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
3097                                         const RegionCodeGenTy &OrderedOpGen,
3098                                         SourceLocation Loc, bool IsThreads) {
3099   if (!CGF.HaveInsertPoint())
3100     return;
3101   // __kmpc_ordered(ident_t *, gtid);
3102   // OrderedOpGen();
3103   // __kmpc_end_ordered(ident_t *, gtid);
3104   // Prepare arguments and build a call to __kmpc_ordered
3105   if (IsThreads) {
3106     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3107     CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3108                           createRuntimeFunction(OMPRTL__kmpc_end_ordered),
3109                           Args);
3110     OrderedOpGen.setAction(Action);
3111     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3112     return;
3113   }
3114   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3115 }
3116 
3117 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
3118                                       OpenMPDirectiveKind Kind, bool EmitChecks,
3119                                       bool ForceSimpleCall) {
3120   if (!CGF.HaveInsertPoint())
3121     return;
3122   // Build call __kmpc_cancel_barrier(loc, thread_id);
3123   // Build call __kmpc_barrier(loc, thread_id);
3124   unsigned Flags;
3125   if (Kind == OMPD_for)
3126     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3127   else if (Kind == OMPD_sections)
3128     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3129   else if (Kind == OMPD_single)
3130     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3131   else if (Kind == OMPD_barrier)
3132     Flags = OMP_IDENT_BARRIER_EXPL;
3133   else
3134     Flags = OMP_IDENT_BARRIER_IMPL;
3135   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3136   // thread_id);
3137   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3138                          getThreadID(CGF, Loc)};
3139   if (auto *OMPRegionInfo =
3140           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
3141     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
3142       auto *Result = CGF.EmitRuntimeCall(
3143           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
3144       if (EmitChecks) {
3145         // if (__kmpc_cancel_barrier()) {
3146         //   exit from construct;
3147         // }
3148         auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
3149         auto *ContBB = CGF.createBasicBlock(".cancel.continue");
3150         auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
3151         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3152         CGF.EmitBlock(ExitBB);
3153         //   exit from construct;
3154         auto CancelDestination =
3155             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3156         CGF.EmitBranchThroughCleanup(CancelDestination);
3157         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3158       }
3159       return;
3160     }
3161   }
3162   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
3163 }
3164 
3165 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
3166 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
3167                                           bool Chunked, bool Ordered) {
3168   switch (ScheduleKind) {
3169   case OMPC_SCHEDULE_static:
3170     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
3171                    : (Ordered ? OMP_ord_static : OMP_sch_static);
3172   case OMPC_SCHEDULE_dynamic:
3173     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
3174   case OMPC_SCHEDULE_guided:
3175     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
3176   case OMPC_SCHEDULE_runtime:
3177     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
3178   case OMPC_SCHEDULE_auto:
3179     return Ordered ? OMP_ord_auto : OMP_sch_auto;
3180   case OMPC_SCHEDULE_unknown:
3181     assert(!Chunked && "chunk was specified but schedule kind not known");
3182     return Ordered ? OMP_ord_static : OMP_sch_static;
3183   }
3184   llvm_unreachable("Unexpected runtime schedule");
3185 }
3186 
3187 /// \brief Map the OpenMP distribute schedule to the runtime enumeration.
3188 static OpenMPSchedType
3189 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
3190   // only static is allowed for dist_schedule
3191   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
3192 }
3193 
3194 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
3195                                          bool Chunked) const {
3196   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3197   return Schedule == OMP_sch_static;
3198 }
3199 
3200 bool CGOpenMPRuntime::isStaticNonchunked(
3201     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3202   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3203   return Schedule == OMP_dist_sch_static;
3204 }
3205 
3206 
3207 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
3208   auto Schedule =
3209       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3210   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3211   return Schedule != OMP_sch_static;
3212 }
3213 
3214 static int addMonoNonMonoModifier(OpenMPSchedType Schedule,
3215                                   OpenMPScheduleClauseModifier M1,
3216                                   OpenMPScheduleClauseModifier M2) {
3217   int Modifier = 0;
3218   switch (M1) {
3219   case OMPC_SCHEDULE_MODIFIER_monotonic:
3220     Modifier = OMP_sch_modifier_monotonic;
3221     break;
3222   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3223     Modifier = OMP_sch_modifier_nonmonotonic;
3224     break;
3225   case OMPC_SCHEDULE_MODIFIER_simd:
3226     if (Schedule == OMP_sch_static_chunked)
3227       Schedule = OMP_sch_static_balanced_chunked;
3228     break;
3229   case OMPC_SCHEDULE_MODIFIER_last:
3230   case OMPC_SCHEDULE_MODIFIER_unknown:
3231     break;
3232   }
3233   switch (M2) {
3234   case OMPC_SCHEDULE_MODIFIER_monotonic:
3235     Modifier = OMP_sch_modifier_monotonic;
3236     break;
3237   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3238     Modifier = OMP_sch_modifier_nonmonotonic;
3239     break;
3240   case OMPC_SCHEDULE_MODIFIER_simd:
3241     if (Schedule == OMP_sch_static_chunked)
3242       Schedule = OMP_sch_static_balanced_chunked;
3243     break;
3244   case OMPC_SCHEDULE_MODIFIER_last:
3245   case OMPC_SCHEDULE_MODIFIER_unknown:
3246     break;
3247   }
3248   return Schedule | Modifier;
3249 }
3250 
3251 void CGOpenMPRuntime::emitForDispatchInit(
3252     CodeGenFunction &CGF, SourceLocation Loc,
3253     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3254     bool Ordered, const DispatchRTInput &DispatchValues) {
3255   if (!CGF.HaveInsertPoint())
3256     return;
3257   OpenMPSchedType Schedule = getRuntimeSchedule(
3258       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3259   assert(Ordered ||
3260          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3261           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3262           Schedule != OMP_sch_static_balanced_chunked));
3263   // Call __kmpc_dispatch_init(
3264   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3265   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
3266   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
3267 
3268   // If the Chunk was not specified in the clause - use default value 1.
3269   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3270                                             : CGF.Builder.getIntN(IVSize, 1);
3271   llvm::Value *Args[] = {
3272       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3273       CGF.Builder.getInt32(addMonoNonMonoModifier(
3274           Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3275       DispatchValues.LB,                                // Lower
3276       DispatchValues.UB,                                // Upper
3277       CGF.Builder.getIntN(IVSize, 1),                   // Stride
3278       Chunk                                             // Chunk
3279   };
3280   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3281 }
3282 
3283 static void emitForStaticInitCall(
3284     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3285     llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule,
3286     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
3287     const CGOpenMPRuntime::StaticRTInput &Values) {
3288   if (!CGF.HaveInsertPoint())
3289     return;
3290 
3291   assert(!Values.Ordered);
3292   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3293          Schedule == OMP_sch_static_balanced_chunked ||
3294          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3295          Schedule == OMP_dist_sch_static ||
3296          Schedule == OMP_dist_sch_static_chunked);
3297 
3298   // Call __kmpc_for_static_init(
3299   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3300   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3301   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3302   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
3303   llvm::Value *Chunk = Values.Chunk;
3304   if (Chunk == nullptr) {
3305     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3306             Schedule == OMP_dist_sch_static) &&
3307            "expected static non-chunked schedule");
3308     // If the Chunk was not specified in the clause - use default value 1.
3309     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3310   } else {
3311     assert((Schedule == OMP_sch_static_chunked ||
3312             Schedule == OMP_sch_static_balanced_chunked ||
3313             Schedule == OMP_ord_static_chunked ||
3314             Schedule == OMP_dist_sch_static_chunked) &&
3315            "expected static chunked schedule");
3316   }
3317   llvm::Value *Args[] = {
3318       UpdateLocation,
3319       ThreadId,
3320       CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1,
3321                                                   M2)), // Schedule type
3322       Values.IL.getPointer(),                           // &isLastIter
3323       Values.LB.getPointer(),                           // &LB
3324       Values.UB.getPointer(),                           // &UB
3325       Values.ST.getPointer(),                           // &Stride
3326       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
3327       Chunk                                             // Chunk
3328   };
3329   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3330 }
3331 
3332 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
3333                                         SourceLocation Loc,
3334                                         OpenMPDirectiveKind DKind,
3335                                         const OpenMPScheduleTy &ScheduleKind,
3336                                         const StaticRTInput &Values) {
3337   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3338       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3339   assert(isOpenMPWorksharingDirective(DKind) &&
3340          "Expected loop-based or sections-based directive.");
3341   auto *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3342                                              isOpenMPLoopDirective(DKind)
3343                                                  ? OMP_IDENT_WORK_LOOP
3344                                                  : OMP_IDENT_WORK_SECTIONS);
3345   auto *ThreadId = getThreadID(CGF, Loc);
3346   auto *StaticInitFunction =
3347       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3348   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3349                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3350 }
3351 
3352 void CGOpenMPRuntime::emitDistributeStaticInit(
3353     CodeGenFunction &CGF, SourceLocation Loc,
3354     OpenMPDistScheduleClauseKind SchedKind,
3355     const CGOpenMPRuntime::StaticRTInput &Values) {
3356   OpenMPSchedType ScheduleNum =
3357       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3358   auto *UpdatedLocation =
3359       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3360   auto *ThreadId = getThreadID(CGF, Loc);
3361   auto *StaticInitFunction =
3362       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3363   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3364                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3365                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
3366 }
3367 
3368 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
3369                                           SourceLocation Loc,
3370                                           OpenMPDirectiveKind DKind) {
3371   if (!CGF.HaveInsertPoint())
3372     return;
3373   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3374   llvm::Value *Args[] = {
3375       emitUpdateLocation(CGF, Loc,
3376                          isOpenMPDistributeDirective(DKind)
3377                              ? OMP_IDENT_WORK_DISTRIBUTE
3378                              : isOpenMPLoopDirective(DKind)
3379                                    ? OMP_IDENT_WORK_LOOP
3380                                    : OMP_IDENT_WORK_SECTIONS),
3381       getThreadID(CGF, Loc)};
3382   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
3383                       Args);
3384 }
3385 
3386 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
3387                                                  SourceLocation Loc,
3388                                                  unsigned IVSize,
3389                                                  bool IVSigned) {
3390   if (!CGF.HaveInsertPoint())
3391     return;
3392   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3393   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3394   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3395 }
3396 
3397 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
3398                                           SourceLocation Loc, unsigned IVSize,
3399                                           bool IVSigned, Address IL,
3400                                           Address LB, Address UB,
3401                                           Address ST) {
3402   // Call __kmpc_dispatch_next(
3403   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3404   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3405   //          kmp_int[32|64] *p_stride);
3406   llvm::Value *Args[] = {
3407       emitUpdateLocation(CGF, Loc),
3408       getThreadID(CGF, Loc),
3409       IL.getPointer(), // &isLastIter
3410       LB.getPointer(), // &Lower
3411       UB.getPointer(), // &Upper
3412       ST.getPointer()  // &Stride
3413   };
3414   llvm::Value *Call =
3415       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3416   return CGF.EmitScalarConversion(
3417       Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true),
3418       CGF.getContext().BoolTy, Loc);
3419 }
3420 
3421 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
3422                                            llvm::Value *NumThreads,
3423                                            SourceLocation Loc) {
3424   if (!CGF.HaveInsertPoint())
3425     return;
3426   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3427   llvm::Value *Args[] = {
3428       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3429       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3430   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
3431                       Args);
3432 }
3433 
3434 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
3435                                          OpenMPProcBindClauseKind ProcBind,
3436                                          SourceLocation Loc) {
3437   if (!CGF.HaveInsertPoint())
3438     return;
3439   // Constants for proc bind value accepted by the runtime.
3440   enum ProcBindTy {
3441     ProcBindFalse = 0,
3442     ProcBindTrue,
3443     ProcBindMaster,
3444     ProcBindClose,
3445     ProcBindSpread,
3446     ProcBindIntel,
3447     ProcBindDefault
3448   } RuntimeProcBind;
3449   switch (ProcBind) {
3450   case OMPC_PROC_BIND_master:
3451     RuntimeProcBind = ProcBindMaster;
3452     break;
3453   case OMPC_PROC_BIND_close:
3454     RuntimeProcBind = ProcBindClose;
3455     break;
3456   case OMPC_PROC_BIND_spread:
3457     RuntimeProcBind = ProcBindSpread;
3458     break;
3459   case OMPC_PROC_BIND_unknown:
3460     llvm_unreachable("Unsupported proc_bind value.");
3461   }
3462   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3463   llvm::Value *Args[] = {
3464       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3465       llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
3466   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
3467 }
3468 
3469 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3470                                 SourceLocation Loc) {
3471   if (!CGF.HaveInsertPoint())
3472     return;
3473   // Build call void __kmpc_flush(ident_t *loc)
3474   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
3475                       emitUpdateLocation(CGF, Loc));
3476 }
3477 
3478 namespace {
3479 /// \brief Indexes of fields for type kmp_task_t.
3480 enum KmpTaskTFields {
3481   /// \brief List of shared variables.
3482   KmpTaskTShareds,
3483   /// \brief Task routine.
3484   KmpTaskTRoutine,
3485   /// \brief Partition id for the untied tasks.
3486   KmpTaskTPartId,
3487   /// Function with call of destructors for private variables.
3488   Data1,
3489   /// Task priority.
3490   Data2,
3491   /// (Taskloops only) Lower bound.
3492   KmpTaskTLowerBound,
3493   /// (Taskloops only) Upper bound.
3494   KmpTaskTUpperBound,
3495   /// (Taskloops only) Stride.
3496   KmpTaskTStride,
3497   /// (Taskloops only) Is last iteration flag.
3498   KmpTaskTLastIter,
3499   /// (Taskloops only) Reduction data.
3500   KmpTaskTReductions,
3501 };
3502 } // anonymous namespace
3503 
3504 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3505   return OffloadEntriesTargetRegion.empty() &&
3506          OffloadEntriesDeviceGlobalVar.empty();
3507 }
3508 
3509 /// \brief Initialize target region entry.
3510 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3511     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3512                                     StringRef ParentName, unsigned LineNum,
3513                                     unsigned Order) {
3514   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3515                                              "only required for the device "
3516                                              "code generation.");
3517   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3518       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3519                                    OMPTargetRegionEntryTargetRegion);
3520   ++OffloadingEntriesNum;
3521 }
3522 
3523 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3524     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3525                                   StringRef ParentName, unsigned LineNum,
3526                                   llvm::Constant *Addr, llvm::Constant *ID,
3527                                   OMPTargetRegionEntryKind Flags) {
3528   // If we are emitting code for a target, the entry is already initialized,
3529   // only has to be registered.
3530   if (CGM.getLangOpts().OpenMPIsDevice) {
3531     assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
3532            "Entry must exist.");
3533     auto &Entry =
3534         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3535     assert(Entry.isValid() && "Entry not initialized!");
3536     Entry.setAddress(Addr);
3537     Entry.setID(ID);
3538     Entry.setFlags(Flags);
3539   } else {
3540     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3541     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3542     ++OffloadingEntriesNum;
3543   }
3544 }
3545 
3546 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3547     unsigned DeviceID, unsigned FileID, StringRef ParentName,
3548     unsigned LineNum) const {
3549   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3550   if (PerDevice == OffloadEntriesTargetRegion.end())
3551     return false;
3552   auto PerFile = PerDevice->second.find(FileID);
3553   if (PerFile == PerDevice->second.end())
3554     return false;
3555   auto PerParentName = PerFile->second.find(ParentName);
3556   if (PerParentName == PerFile->second.end())
3557     return false;
3558   auto PerLine = PerParentName->second.find(LineNum);
3559   if (PerLine == PerParentName->second.end())
3560     return false;
3561   // Fail if this entry is already registered.
3562   if (PerLine->second.getAddress() || PerLine->second.getID())
3563     return false;
3564   return true;
3565 }
3566 
3567 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3568     const OffloadTargetRegionEntryInfoActTy &Action) {
3569   // Scan all target region entries and perform the provided action.
3570   for (const auto &D : OffloadEntriesTargetRegion)
3571     for (const auto &F : D.second)
3572       for (const auto &P : F.second)
3573         for (const auto &L : P.second)
3574           Action(D.first, F.first, P.first(), L.first, L.second);
3575 }
3576 
3577 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3578     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3579                                        OMPTargetGlobalVarEntryKind Flags,
3580                                        unsigned Order) {
3581   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3582                                              "only required for the device "
3583                                              "code generation.");
3584   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3585   ++OffloadingEntriesNum;
3586 }
3587 
3588 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3589     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3590                                      CharUnits VarSize,
3591                                      OMPTargetGlobalVarEntryKind Flags,
3592                                      llvm::GlobalValue::LinkageTypes Linkage) {
3593   if (CGM.getLangOpts().OpenMPIsDevice) {
3594     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3595     assert(Entry.isValid() && Entry.getFlags() == Flags &&
3596            "Entry not initialized!");
3597     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3598            "Resetting with the new address.");
3599     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName))
3600       return;
3601     Entry.setAddress(Addr);
3602     Entry.setVarSize(VarSize);
3603     Entry.setLinkage(Linkage);
3604   } else {
3605     if (hasDeviceGlobalVarEntryInfo(VarName))
3606       return;
3607     OffloadEntriesDeviceGlobalVar.try_emplace(
3608         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3609     ++OffloadingEntriesNum;
3610   }
3611 }
3612 
3613 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3614     actOnDeviceGlobalVarEntriesInfo(
3615         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3616   // Scan all target region entries and perform the provided action.
3617   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3618     Action(E.getKey(), E.getValue());
3619 }
3620 
3621 llvm::Function *
3622 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
3623   // If we don't have entries or if we are emitting code for the device, we
3624   // don't need to do anything.
3625   if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
3626     return nullptr;
3627 
3628   auto &M = CGM.getModule();
3629   auto &C = CGM.getContext();
3630 
3631   // Get list of devices we care about
3632   auto &Devices = CGM.getLangOpts().OMPTargetTriples;
3633 
3634   // We should be creating an offloading descriptor only if there are devices
3635   // specified.
3636   assert(!Devices.empty() && "No OpenMP offloading devices??");
3637 
3638   // Create the external variables that will point to the begin and end of the
3639   // host entries section. These will be defined by the linker.
3640   auto *OffloadEntryTy =
3641       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
3642   llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable(
3643       M, OffloadEntryTy, /*isConstant=*/true,
3644       llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
3645       ".omp_offloading.entries_begin");
3646   llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable(
3647       M, OffloadEntryTy, /*isConstant=*/true,
3648       llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
3649       ".omp_offloading.entries_end");
3650 
3651   // Create all device images
3652   auto *DeviceImageTy = cast<llvm::StructType>(
3653       CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy()));
3654   ConstantInitBuilder DeviceImagesBuilder(CGM);
3655   auto DeviceImagesEntries = DeviceImagesBuilder.beginArray(DeviceImageTy);
3656 
3657   for (llvm::Triple Device : Devices) {
3658     StringRef T = Device.getTriple();
3659     auto *ImgBegin = new llvm::GlobalVariable(
3660         M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
3661         /*Initializer=*/nullptr,
3662         Twine(".omp_offloading.img_start.") + Twine(T));
3663     auto *ImgEnd = new llvm::GlobalVariable(
3664         M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
3665         /*Initializer=*/nullptr, Twine(".omp_offloading.img_end.") + Twine(T));
3666 
3667     auto Dev = DeviceImagesEntries.beginStruct(DeviceImageTy);
3668     Dev.add(ImgBegin);
3669     Dev.add(ImgEnd);
3670     Dev.add(HostEntriesBegin);
3671     Dev.add(HostEntriesEnd);
3672     Dev.finishAndAddTo(DeviceImagesEntries);
3673   }
3674 
3675   // Create device images global array.
3676   llvm::GlobalVariable *DeviceImages =
3677     DeviceImagesEntries.finishAndCreateGlobal(".omp_offloading.device_images",
3678                                               CGM.getPointerAlign(),
3679                                               /*isConstant=*/true);
3680   DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3681 
3682   // This is a Zero array to be used in the creation of the constant expressions
3683   llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
3684                              llvm::Constant::getNullValue(CGM.Int32Ty)};
3685 
3686   // Create the target region descriptor.
3687   auto *BinaryDescriptorTy = cast<llvm::StructType>(
3688       CGM.getTypes().ConvertTypeForMem(getTgtBinaryDescriptorQTy()));
3689   ConstantInitBuilder DescBuilder(CGM);
3690   auto DescInit = DescBuilder.beginStruct(BinaryDescriptorTy);
3691   DescInit.addInt(CGM.Int32Ty, Devices.size());
3692   DescInit.add(llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(),
3693                                                     DeviceImages,
3694                                                     Index));
3695   DescInit.add(HostEntriesBegin);
3696   DescInit.add(HostEntriesEnd);
3697 
3698   auto *Desc = DescInit.finishAndCreateGlobal(".omp_offloading.descriptor",
3699                                               CGM.getPointerAlign(),
3700                                               /*isConstant=*/true);
3701 
3702   // Emit code to register or unregister the descriptor at execution
3703   // startup or closing, respectively.
3704 
3705   llvm::Function *UnRegFn;
3706   {
3707     FunctionArgList Args;
3708     ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other);
3709     Args.push_back(&DummyPtr);
3710 
3711     CodeGenFunction CGF(CGM);
3712     // Disable debug info for global (de-)initializer because they are not part
3713     // of some particular construct.
3714     CGF.disableDebugInfo();
3715     auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3716     auto FTy = CGM.getTypes().GetFunctionType(FI);
3717     UnRegFn = CGM.CreateGlobalInitOrDestructFunction(
3718         FTy, ".omp_offloading.descriptor_unreg", FI);
3719     CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args);
3720     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
3721                         Desc);
3722     CGF.FinishFunction();
3723   }
3724   llvm::Function *RegFn;
3725   {
3726     CodeGenFunction CGF(CGM);
3727     // Disable debug info for global (de-)initializer because they are not part
3728     // of some particular construct.
3729     CGF.disableDebugInfo();
3730     auto &FI = CGM.getTypes().arrangeNullaryFunction();
3731     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
3732     RegFn = CGM.CreateGlobalInitOrDestructFunction(
3733         FTy, ".omp_offloading.descriptor_reg", FI);
3734     CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList());
3735     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc);
3736     // Create a variable to drive the registration and unregistration of the
3737     // descriptor, so we can reuse the logic that emits Ctors and Dtors.
3738     ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(),
3739                                   SourceLocation(), nullptr, C.CharTy,
3740                                   ImplicitParamDecl::Other);
3741     CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
3742     CGF.FinishFunction();
3743   }
3744   if (CGM.supportsCOMDAT()) {
3745     // It is sufficient to call registration function only once, so create a
3746     // COMDAT group for registration/unregistration functions and associated
3747     // data. That would reduce startup time and code size. Registration
3748     // function serves as a COMDAT group key.
3749     auto ComdatKey = M.getOrInsertComdat(RegFn->getName());
3750     RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
3751     RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility);
3752     RegFn->setComdat(ComdatKey);
3753     UnRegFn->setComdat(ComdatKey);
3754     DeviceImages->setComdat(ComdatKey);
3755     Desc->setComdat(ComdatKey);
3756   }
3757   return RegFn;
3758 }
3759 
3760 void CGOpenMPRuntime::createOffloadEntry(
3761     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3762     llvm::GlobalValue::LinkageTypes Linkage) {
3763   StringRef Name = Addr->getName();
3764   auto *TgtOffloadEntryType = cast<llvm::StructType>(
3765       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()));
3766   llvm::LLVMContext &C = CGM.getModule().getContext();
3767   llvm::Module &M = CGM.getModule();
3768 
3769   // Make sure the address has the right type.
3770   llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy);
3771 
3772   // Create constant string with the name.
3773   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3774 
3775   llvm::GlobalVariable *Str =
3776       new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true,
3777                                llvm::GlobalValue::InternalLinkage, StrPtrInit,
3778                                ".omp_offloading.entry_name");
3779   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3780   llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy);
3781 
3782   // We can't have any padding between symbols, so we need to have 1-byte
3783   // alignment.
3784   auto Align = CharUnits::fromQuantity(1);
3785 
3786   // Create the entry struct.
3787   ConstantInitBuilder EntryBuilder(CGM);
3788   auto EntryInit = EntryBuilder.beginStruct(TgtOffloadEntryType);
3789   EntryInit.add(AddrPtr);
3790   EntryInit.add(StrPtr);
3791   EntryInit.addInt(CGM.SizeTy, Size);
3792   EntryInit.addInt(CGM.Int32Ty, Flags);
3793   EntryInit.addInt(CGM.Int32Ty, 0);
3794   llvm::GlobalVariable *Entry = EntryInit.finishAndCreateGlobal(
3795       Twine(".omp_offloading.entry.", Name), Align,
3796       /*Constant=*/true, Linkage);
3797 
3798   // The entry has to be created in the section the linker expects it to be.
3799   Entry->setSection(".omp_offloading.entries");
3800 }
3801 
3802 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3803   // Emit the offloading entries and metadata so that the device codegen side
3804   // can easily figure out what to emit. The produced metadata looks like
3805   // this:
3806   //
3807   // !omp_offload.info = !{!1, ...}
3808   //
3809   // Right now we only generate metadata for function that contain target
3810   // regions.
3811 
3812   // If we do not have entries, we don't need to do anything.
3813   if (OffloadEntriesInfoManager.empty())
3814     return;
3815 
3816   llvm::Module &M = CGM.getModule();
3817   llvm::LLVMContext &C = M.getContext();
3818   SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
3819       OrderedEntries(OffloadEntriesInfoManager.size());
3820 
3821   // Auxiliary methods to create metadata values and strings.
3822   auto &&GetMDInt = [&C](unsigned V) {
3823     return llvm::ConstantAsMetadata::get(
3824         llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), V));
3825   };
3826 
3827   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3828 
3829   // Create the offloading info metadata node.
3830   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3831 
3832   // Create function that emits metadata for each target region entry;
3833   auto &&TargetRegionMetadataEmitter =
3834       [&C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
3835           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3836           unsigned Line,
3837           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3838         // Generate metadata for target regions. Each entry of this metadata
3839         // contains:
3840         // - Entry 0 -> Kind of this type of metadata (0).
3841         // - Entry 1 -> Device ID of the file where the entry was identified.
3842         // - Entry 2 -> File ID of the file where the entry was identified.
3843         // - Entry 3 -> Mangled name of the function where the entry was
3844         // identified.
3845         // - Entry 4 -> Line in the file where the entry was identified.
3846         // - Entry 5 -> Order the entry was created.
3847         // The first element of the metadata node is the kind.
3848         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3849                                  GetMDInt(FileID),      GetMDString(ParentName),
3850                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3851 
3852         // Save this entry in the right position of the ordered entries array.
3853         OrderedEntries[E.getOrder()] = &E;
3854 
3855         // Add metadata to the named metadata node.
3856         MD->addOperand(llvm::MDNode::get(C, Ops));
3857       };
3858 
3859   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3860       TargetRegionMetadataEmitter);
3861 
3862   // Create function that emits metadata for each device global variable entry;
3863   auto &&DeviceGlobalVarMetadataEmitter =
3864       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3865        MD](StringRef MangledName,
3866            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3867                &E) {
3868         // Generate metadata for global variables. Each entry of this metadata
3869         // contains:
3870         // - Entry 0 -> Kind of this type of metadata (1).
3871         // - Entry 1 -> Mangled name of the variable.
3872         // - Entry 2 -> Declare target kind.
3873         // - Entry 3 -> Order the entry was created.
3874         // The first element of the metadata node is the kind.
3875         llvm::Metadata *Ops[] = {
3876             GetMDInt(E.getKind()), GetMDString(MangledName),
3877             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3878 
3879         // Save this entry in the right position of the ordered entries array.
3880         OrderedEntries[E.getOrder()] = &E;
3881 
3882         // Add metadata to the named metadata node.
3883         MD->addOperand(llvm::MDNode::get(C, Ops));
3884       };
3885 
3886   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3887       DeviceGlobalVarMetadataEmitter);
3888 
3889   for (const auto *E : OrderedEntries) {
3890     assert(E && "All ordered entries must exist!");
3891     if (const auto *CE =
3892             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3893                 E)) {
3894       assert(CE->getID() && CE->getAddress() &&
3895              "Entry ID and Addr are invalid!");
3896       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3897                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3898     } else if (const auto *CE =
3899                    dyn_cast<OffloadEntriesInfoManagerTy::
3900                                 OffloadEntryInfoDeviceGlobalVar>(E)) {
3901       assert(CE->getAddress() && "Entry Addr is invalid!");
3902       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3903                          CE->getVarSize().getQuantity(), CE->getFlags(),
3904                          CE->getLinkage());
3905     } else {
3906       llvm_unreachable("Unsupported entry kind.");
3907     }
3908   }
3909 }
3910 
3911 /// \brief Loads all the offload entries information from the host IR
3912 /// metadata.
3913 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3914   // If we are in target mode, load the metadata from the host IR. This code has
3915   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3916 
3917   if (!CGM.getLangOpts().OpenMPIsDevice)
3918     return;
3919 
3920   if (CGM.getLangOpts().OMPHostIRFile.empty())
3921     return;
3922 
3923   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3924   if (Buf.getError())
3925     return;
3926 
3927   llvm::LLVMContext C;
3928   auto ME = expectedToErrorOrAndEmitErrors(
3929       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3930 
3931   if (ME.getError())
3932     return;
3933 
3934   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3935   if (!MD)
3936     return;
3937 
3938   for (llvm::MDNode *MN : MD->operands()) {
3939     auto GetMDInt = [MN](unsigned Idx) {
3940       llvm::ConstantAsMetadata *V =
3941           cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3942       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3943     };
3944 
3945     auto GetMDString = [MN](unsigned Idx) {
3946       llvm::MDString *V = cast<llvm::MDString>(MN->getOperand(Idx));
3947       return V->getString();
3948     };
3949 
3950     switch (GetMDInt(0)) {
3951     default:
3952       llvm_unreachable("Unexpected metadata!");
3953       break;
3954     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3955         OffloadingEntryInfoTargetRegion:
3956       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3957           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3958           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3959           /*Order=*/GetMDInt(5));
3960       break;
3961     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3962         OffloadingEntryInfoDeviceGlobalVar:
3963       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3964           /*MangledName=*/GetMDString(1),
3965           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3966               /*Flags=*/GetMDInt(2)),
3967           /*Order=*/GetMDInt(3));
3968       break;
3969     }
3970   }
3971 }
3972 
3973 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3974   if (!KmpRoutineEntryPtrTy) {
3975     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3976     auto &C = CGM.getContext();
3977     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3978     FunctionProtoType::ExtProtoInfo EPI;
3979     KmpRoutineEntryPtrQTy = C.getPointerType(
3980         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3981     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3982   }
3983 }
3984 
3985 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
3986                                        QualType FieldTy) {
3987   auto *Field = FieldDecl::Create(
3988       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
3989       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
3990       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
3991   Field->setAccess(AS_public);
3992   DC->addDecl(Field);
3993   return Field;
3994 }
3995 
3996 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3997 
3998   // Make sure the type of the entry is already created. This is the type we
3999   // have to create:
4000   // struct __tgt_offload_entry{
4001   //   void      *addr;       // Pointer to the offload entry info.
4002   //                          // (function or global)
4003   //   char      *name;       // Name of the function or global.
4004   //   size_t     size;       // Size of the entry info (0 if it a function).
4005   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
4006   //   int32_t    reserved;   // Reserved, to use by the runtime library.
4007   // };
4008   if (TgtOffloadEntryQTy.isNull()) {
4009     ASTContext &C = CGM.getContext();
4010     auto *RD = C.buildImplicitRecord("__tgt_offload_entry");
4011     RD->startDefinition();
4012     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4013     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
4014     addFieldToRecordDecl(C, RD, C.getSizeType());
4015     addFieldToRecordDecl(
4016         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4017     addFieldToRecordDecl(
4018         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4019     RD->completeDefinition();
4020     RD->addAttr(PackedAttr::CreateImplicit(C));
4021     TgtOffloadEntryQTy = C.getRecordType(RD);
4022   }
4023   return TgtOffloadEntryQTy;
4024 }
4025 
4026 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() {
4027   // These are the types we need to build:
4028   // struct __tgt_device_image{
4029   // void   *ImageStart;       // Pointer to the target code start.
4030   // void   *ImageEnd;         // Pointer to the target code end.
4031   // // We also add the host entries to the device image, as it may be useful
4032   // // for the target runtime to have access to that information.
4033   // __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all
4034   //                                       // the entries.
4035   // __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
4036   //                                       // entries (non inclusive).
4037   // };
4038   if (TgtDeviceImageQTy.isNull()) {
4039     ASTContext &C = CGM.getContext();
4040     auto *RD = C.buildImplicitRecord("__tgt_device_image");
4041     RD->startDefinition();
4042     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4043     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4044     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4045     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4046     RD->completeDefinition();
4047     TgtDeviceImageQTy = C.getRecordType(RD);
4048   }
4049   return TgtDeviceImageQTy;
4050 }
4051 
4052 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() {
4053   // struct __tgt_bin_desc{
4054   //   int32_t              NumDevices;      // Number of devices supported.
4055   //   __tgt_device_image   *DeviceImages;   // Arrays of device images
4056   //                                         // (one per device).
4057   //   __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all the
4058   //                                         // entries.
4059   //   __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
4060   //                                         // entries (non inclusive).
4061   // };
4062   if (TgtBinaryDescriptorQTy.isNull()) {
4063     ASTContext &C = CGM.getContext();
4064     auto *RD = C.buildImplicitRecord("__tgt_bin_desc");
4065     RD->startDefinition();
4066     addFieldToRecordDecl(
4067         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4068     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy()));
4069     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4070     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4071     RD->completeDefinition();
4072     TgtBinaryDescriptorQTy = C.getRecordType(RD);
4073   }
4074   return TgtBinaryDescriptorQTy;
4075 }
4076 
4077 namespace {
4078 struct PrivateHelpersTy {
4079   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
4080                    const VarDecl *PrivateElemInit)
4081       : Original(Original), PrivateCopy(PrivateCopy),
4082         PrivateElemInit(PrivateElemInit) {}
4083   const VarDecl *Original;
4084   const VarDecl *PrivateCopy;
4085   const VarDecl *PrivateElemInit;
4086 };
4087 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4088 } // anonymous namespace
4089 
4090 static RecordDecl *
4091 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
4092   if (!Privates.empty()) {
4093     auto &C = CGM.getContext();
4094     // Build struct .kmp_privates_t. {
4095     //         /*  private vars  */
4096     //       };
4097     auto *RD = C.buildImplicitRecord(".kmp_privates.t");
4098     RD->startDefinition();
4099     for (auto &&Pair : Privates) {
4100       auto *VD = Pair.second.Original;
4101       auto Type = VD->getType();
4102       Type = Type.getNonReferenceType();
4103       auto *FD = addFieldToRecordDecl(C, RD, Type);
4104       if (VD->hasAttrs()) {
4105         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4106              E(VD->getAttrs().end());
4107              I != E; ++I)
4108           FD->addAttr(*I);
4109       }
4110     }
4111     RD->completeDefinition();
4112     return RD;
4113   }
4114   return nullptr;
4115 }
4116 
4117 static RecordDecl *
4118 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
4119                          QualType KmpInt32Ty,
4120                          QualType KmpRoutineEntryPointerQTy) {
4121   auto &C = CGM.getContext();
4122   // Build struct kmp_task_t {
4123   //         void *              shareds;
4124   //         kmp_routine_entry_t routine;
4125   //         kmp_int32           part_id;
4126   //         kmp_cmplrdata_t data1;
4127   //         kmp_cmplrdata_t data2;
4128   // For taskloops additional fields:
4129   //         kmp_uint64          lb;
4130   //         kmp_uint64          ub;
4131   //         kmp_int64           st;
4132   //         kmp_int32           liter;
4133   //         void *              reductions;
4134   //       };
4135   auto *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4136   UD->startDefinition();
4137   addFieldToRecordDecl(C, UD, KmpInt32Ty);
4138   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4139   UD->completeDefinition();
4140   QualType KmpCmplrdataTy = C.getRecordType(UD);
4141   auto *RD = C.buildImplicitRecord("kmp_task_t");
4142   RD->startDefinition();
4143   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4144   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4145   addFieldToRecordDecl(C, RD, KmpInt32Ty);
4146   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4147   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4148   if (isOpenMPTaskLoopDirective(Kind)) {
4149     QualType KmpUInt64Ty =
4150         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4151     QualType KmpInt64Ty =
4152         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4153     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4154     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4155     addFieldToRecordDecl(C, RD, KmpInt64Ty);
4156     addFieldToRecordDecl(C, RD, KmpInt32Ty);
4157     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4158   }
4159   RD->completeDefinition();
4160   return RD;
4161 }
4162 
4163 static RecordDecl *
4164 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
4165                                      ArrayRef<PrivateDataTy> Privates) {
4166   auto &C = CGM.getContext();
4167   // Build struct kmp_task_t_with_privates {
4168   //         kmp_task_t task_data;
4169   //         .kmp_privates_t. privates;
4170   //       };
4171   auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4172   RD->startDefinition();
4173   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4174   if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) {
4175     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4176   }
4177   RD->completeDefinition();
4178   return RD;
4179 }
4180 
4181 /// \brief Emit a proxy function which accepts kmp_task_t as the second
4182 /// argument.
4183 /// \code
4184 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4185 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4186 ///   For taskloops:
4187 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4188 ///   tt->reductions, tt->shareds);
4189 ///   return 0;
4190 /// }
4191 /// \endcode
4192 static llvm::Value *
4193 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
4194                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4195                       QualType KmpTaskTWithPrivatesPtrQTy,
4196                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4197                       QualType SharedsPtrTy, llvm::Value *TaskFunction,
4198                       llvm::Value *TaskPrivatesMap) {
4199   auto &C = CGM.getContext();
4200   FunctionArgList Args;
4201   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4202                             ImplicitParamDecl::Other);
4203   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4204                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4205                                 ImplicitParamDecl::Other);
4206   Args.push_back(&GtidArg);
4207   Args.push_back(&TaskTypeArg);
4208   auto &TaskEntryFnInfo =
4209       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4210   auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4211   auto *TaskEntry =
4212       llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
4213                              ".omp_task_entry.", &CGM.getModule());
4214   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4215   TaskEntry->setDoesNotRecurse();
4216   CodeGenFunction CGF(CGM);
4217   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4218                     Loc, Loc);
4219 
4220   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4221   // tt,
4222   // For taskloops:
4223   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4224   // tt->task_data.shareds);
4225   auto *GtidParam = CGF.EmitLoadOfScalar(
4226       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4227   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4228       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4229       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4230   auto *KmpTaskTWithPrivatesQTyRD =
4231       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4232   LValue Base =
4233       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4234   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4235   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4236   auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4237   auto *PartidParam = PartIdLVal.getPointer();
4238 
4239   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4240   auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4241   auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4242       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4243       CGF.ConvertTypeForMem(SharedsPtrTy));
4244 
4245   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4246   llvm::Value *PrivatesParam;
4247   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4248     auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4249     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4250         PrivatesLVal.getPointer(), CGF.VoidPtrTy);
4251   } else
4252     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4253 
4254   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4255                                TaskPrivatesMap,
4256                                CGF.Builder
4257                                    .CreatePointerBitCastOrAddrSpaceCast(
4258                                        TDBase.getAddress(), CGF.VoidPtrTy)
4259                                    .getPointer()};
4260   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4261                                           std::end(CommonArgs));
4262   if (isOpenMPTaskLoopDirective(Kind)) {
4263     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4264     auto LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4265     auto *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4266     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4267     auto UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4268     auto *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4269     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4270     auto StLVal = CGF.EmitLValueForField(Base, *StFI);
4271     auto *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4272     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4273     auto LILVal = CGF.EmitLValueForField(Base, *LIFI);
4274     auto *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4275     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4276     auto RLVal = CGF.EmitLValueForField(Base, *RFI);
4277     auto *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4278     CallArgs.push_back(LBParam);
4279     CallArgs.push_back(UBParam);
4280     CallArgs.push_back(StParam);
4281     CallArgs.push_back(LIParam);
4282     CallArgs.push_back(RParam);
4283   }
4284   CallArgs.push_back(SharedsParam);
4285 
4286   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4287                                                   CallArgs);
4288   CGF.EmitStoreThroughLValue(
4289       RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4290       CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4291   CGF.FinishFunction();
4292   return TaskEntry;
4293 }
4294 
4295 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
4296                                             SourceLocation Loc,
4297                                             QualType KmpInt32Ty,
4298                                             QualType KmpTaskTWithPrivatesPtrQTy,
4299                                             QualType KmpTaskTWithPrivatesQTy) {
4300   auto &C = CGM.getContext();
4301   FunctionArgList Args;
4302   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4303                             ImplicitParamDecl::Other);
4304   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4305                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4306                                 ImplicitParamDecl::Other);
4307   Args.push_back(&GtidArg);
4308   Args.push_back(&TaskTypeArg);
4309   auto &DestructorFnInfo =
4310       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4311   auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo);
4312   auto *DestructorFn =
4313       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
4314                              ".omp_task_destructor.", &CGM.getModule());
4315   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4316                                     DestructorFnInfo);
4317   DestructorFn->setDoesNotRecurse();
4318   CodeGenFunction CGF(CGM);
4319   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4320                     Args, Loc, Loc);
4321 
4322   LValue Base = CGF.EmitLoadOfPointerLValue(
4323       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4324       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4325   auto *KmpTaskTWithPrivatesQTyRD =
4326       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4327   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4328   Base = CGF.EmitLValueForField(Base, *FI);
4329   for (auto *Field :
4330        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4331     if (auto DtorKind = Field->getType().isDestructedType()) {
4332       auto FieldLValue = CGF.EmitLValueForField(Base, Field);
4333       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
4334     }
4335   }
4336   CGF.FinishFunction();
4337   return DestructorFn;
4338 }
4339 
4340 /// \brief Emit a privates mapping function for correct handling of private and
4341 /// firstprivate variables.
4342 /// \code
4343 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4344 /// **noalias priv1,...,  <tyn> **noalias privn) {
4345 ///   *priv1 = &.privates.priv1;
4346 ///   ...;
4347 ///   *privn = &.privates.privn;
4348 /// }
4349 /// \endcode
4350 static llvm::Value *
4351 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
4352                                ArrayRef<const Expr *> PrivateVars,
4353                                ArrayRef<const Expr *> FirstprivateVars,
4354                                ArrayRef<const Expr *> LastprivateVars,
4355                                QualType PrivatesQTy,
4356                                ArrayRef<PrivateDataTy> Privates) {
4357   auto &C = CGM.getContext();
4358   FunctionArgList Args;
4359   ImplicitParamDecl TaskPrivatesArg(
4360       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4361       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4362       ImplicitParamDecl::Other);
4363   Args.push_back(&TaskPrivatesArg);
4364   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4365   unsigned Counter = 1;
4366   for (auto *E: PrivateVars) {
4367     Args.push_back(ImplicitParamDecl::Create(
4368         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4369         C.getPointerType(C.getPointerType(E->getType()))
4370             .withConst()
4371             .withRestrict(),
4372         ImplicitParamDecl::Other));
4373     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4374     PrivateVarsPos[VD] = Counter;
4375     ++Counter;
4376   }
4377   for (auto *E : FirstprivateVars) {
4378     Args.push_back(ImplicitParamDecl::Create(
4379         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4380         C.getPointerType(C.getPointerType(E->getType()))
4381             .withConst()
4382             .withRestrict(),
4383         ImplicitParamDecl::Other));
4384     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4385     PrivateVarsPos[VD] = Counter;
4386     ++Counter;
4387   }
4388   for (auto *E: LastprivateVars) {
4389     Args.push_back(ImplicitParamDecl::Create(
4390         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4391         C.getPointerType(C.getPointerType(E->getType()))
4392             .withConst()
4393             .withRestrict(),
4394         ImplicitParamDecl::Other));
4395     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4396     PrivateVarsPos[VD] = Counter;
4397     ++Counter;
4398   }
4399   auto &TaskPrivatesMapFnInfo =
4400       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4401   auto *TaskPrivatesMapTy =
4402       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4403   auto *TaskPrivatesMap = llvm::Function::Create(
4404       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage,
4405       ".omp_task_privates_map.", &CGM.getModule());
4406   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
4407                                     TaskPrivatesMapFnInfo);
4408   TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4409   TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4410   TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4411   CodeGenFunction CGF(CGM);
4412   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4413                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
4414 
4415   // *privi = &.privates.privi;
4416   LValue Base = CGF.EmitLoadOfPointerLValue(
4417       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4418       TaskPrivatesArg.getType()->castAs<PointerType>());
4419   auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4420   Counter = 0;
4421   for (auto *Field : PrivatesQTyRD->fields()) {
4422     auto FieldLVal = CGF.EmitLValueForField(Base, Field);
4423     auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4424     auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4425     auto RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4426         RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
4427     CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
4428     ++Counter;
4429   }
4430   CGF.FinishFunction();
4431   return TaskPrivatesMap;
4432 }
4433 
4434 static bool stable_sort_comparator(const PrivateDataTy P1,
4435                                    const PrivateDataTy P2) {
4436   return P1.first > P2.first;
4437 }
4438 
4439 /// Emit initialization for private variables in task-based directives.
4440 static void emitPrivatesInit(CodeGenFunction &CGF,
4441                              const OMPExecutableDirective &D,
4442                              Address KmpTaskSharedsPtr, LValue TDBase,
4443                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4444                              QualType SharedsTy, QualType SharedsPtrTy,
4445                              const OMPTaskDataTy &Data,
4446                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4447   auto &C = CGF.getContext();
4448   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4449   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4450   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
4451                                  ? OMPD_taskloop
4452                                  : OMPD_task;
4453   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
4454   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
4455   LValue SrcBase;
4456   bool IsTargetTask =
4457       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
4458       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
4459   // For target-based directives skip 3 firstprivate arrays BasePointersArray,
4460   // PointersArray and SizesArray. The original variables for these arrays are
4461   // not captured and we get their addresses explicitly.
4462   if ((!IsTargetTask && !Data.FirstprivateVars.empty()) ||
4463       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
4464     SrcBase = CGF.MakeAddrLValue(
4465         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4466             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4467         SharedsTy);
4468   }
4469   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4470   for (auto &&Pair : Privates) {
4471     auto *VD = Pair.second.PrivateCopy;
4472     auto *Init = VD->getAnyInitializer();
4473     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4474                              !CGF.isTrivialInitializer(Init)))) {
4475       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4476       if (auto *Elem = Pair.second.PrivateElemInit) {
4477         auto *OriginalVD = Pair.second.Original;
4478         // Check if the variable is the target-based BasePointersArray,
4479         // PointersArray or SizesArray.
4480         LValue SharedRefLValue;
4481         QualType Type = OriginalVD->getType();
4482         auto *SharedField = CapturesInfo.lookup(OriginalVD);
4483         if (IsTargetTask && !SharedField) {
4484           assert(isa<ImplicitParamDecl>(OriginalVD) &&
4485                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
4486                  cast<CapturedDecl>(OriginalVD->getDeclContext())
4487                          ->getNumParams() == 0 &&
4488                  isa<TranslationUnitDecl>(
4489                      cast<CapturedDecl>(OriginalVD->getDeclContext())
4490                          ->getDeclContext()) &&
4491                  "Expected artificial target data variable.");
4492           SharedRefLValue =
4493               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
4494         } else {
4495           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4496           SharedRefLValue = CGF.MakeAddrLValue(
4497               Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
4498               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
4499               SharedRefLValue.getTBAAInfo());
4500         }
4501         if (Type->isArrayType()) {
4502           // Initialize firstprivate array.
4503           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4504             // Perform simple memcpy.
4505             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
4506           } else {
4507             // Initialize firstprivate array using element-by-element
4508             // initialization.
4509             CGF.EmitOMPAggregateAssign(
4510                 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
4511                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4512                                                   Address SrcElement) {
4513                   // Clean up any temporaries needed by the initialization.
4514                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
4515                   InitScope.addPrivate(
4516                       Elem, [SrcElement]() -> Address { return SrcElement; });
4517                   (void)InitScope.Privatize();
4518                   // Emit initialization for single element.
4519                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
4520                       CGF, &CapturesInfo);
4521                   CGF.EmitAnyExprToMem(Init, DestElement,
4522                                        Init->getType().getQualifiers(),
4523                                        /*IsInitializer=*/false);
4524                 });
4525           }
4526         } else {
4527           CodeGenFunction::OMPPrivateScope InitScope(CGF);
4528           InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
4529             return SharedRefLValue.getAddress();
4530           });
4531           (void)InitScope.Privatize();
4532           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4533           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4534                              /*capturedByInit=*/false);
4535         }
4536       } else
4537         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4538     }
4539     ++FI;
4540   }
4541 }
4542 
4543 /// Check if duplication function is required for taskloops.
4544 static bool checkInitIsRequired(CodeGenFunction &CGF,
4545                                 ArrayRef<PrivateDataTy> Privates) {
4546   bool InitRequired = false;
4547   for (auto &&Pair : Privates) {
4548     auto *VD = Pair.second.PrivateCopy;
4549     auto *Init = VD->getAnyInitializer();
4550     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4551                                     !CGF.isTrivialInitializer(Init));
4552   }
4553   return InitRequired;
4554 }
4555 
4556 
4557 /// Emit task_dup function (for initialization of
4558 /// private/firstprivate/lastprivate vars and last_iter flag)
4559 /// \code
4560 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4561 /// lastpriv) {
4562 /// // setup lastprivate flag
4563 ///    task_dst->last = lastpriv;
4564 /// // could be constructor calls here...
4565 /// }
4566 /// \endcode
4567 static llvm::Value *
4568 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4569                     const OMPExecutableDirective &D,
4570                     QualType KmpTaskTWithPrivatesPtrQTy,
4571                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4572                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4573                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4574                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4575   auto &C = CGM.getContext();
4576   FunctionArgList Args;
4577   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4578                            KmpTaskTWithPrivatesPtrQTy,
4579                            ImplicitParamDecl::Other);
4580   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4581                            KmpTaskTWithPrivatesPtrQTy,
4582                            ImplicitParamDecl::Other);
4583   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4584                                 ImplicitParamDecl::Other);
4585   Args.push_back(&DstArg);
4586   Args.push_back(&SrcArg);
4587   Args.push_back(&LastprivArg);
4588   auto &TaskDupFnInfo =
4589       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4590   auto *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4591   auto *TaskDup =
4592       llvm::Function::Create(TaskDupTy, llvm::GlobalValue::InternalLinkage,
4593                              ".omp_task_dup.", &CGM.getModule());
4594   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4595   TaskDup->setDoesNotRecurse();
4596   CodeGenFunction CGF(CGM);
4597   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4598                     Loc);
4599 
4600   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4601       CGF.GetAddrOfLocalVar(&DstArg),
4602       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4603   // task_dst->liter = lastpriv;
4604   if (WithLastIter) {
4605     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4606     LValue Base = CGF.EmitLValueForField(
4607         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4608     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4609     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4610         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4611     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4612   }
4613 
4614   // Emit initial values for private copies (if any).
4615   assert(!Privates.empty());
4616   Address KmpTaskSharedsPtr = Address::invalid();
4617   if (!Data.FirstprivateVars.empty()) {
4618     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4619         CGF.GetAddrOfLocalVar(&SrcArg),
4620         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4621     LValue Base = CGF.EmitLValueForField(
4622         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4623     KmpTaskSharedsPtr = Address(
4624         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4625                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4626                                                   KmpTaskTShareds)),
4627                              Loc),
4628         CGF.getNaturalTypeAlignment(SharedsTy));
4629   }
4630   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4631                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4632   CGF.FinishFunction();
4633   return TaskDup;
4634 }
4635 
4636 /// Checks if destructor function is required to be generated.
4637 /// \return true if cleanups are required, false otherwise.
4638 static bool
4639 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4640   bool NeedsCleanup = false;
4641   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4642   auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4643   for (auto *FD : PrivateRD->fields()) {
4644     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4645     if (NeedsCleanup)
4646       break;
4647   }
4648   return NeedsCleanup;
4649 }
4650 
4651 CGOpenMPRuntime::TaskResultTy
4652 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4653                               const OMPExecutableDirective &D,
4654                               llvm::Value *TaskFunction, QualType SharedsTy,
4655                               Address Shareds, const OMPTaskDataTy &Data) {
4656   auto &C = CGM.getContext();
4657   llvm::SmallVector<PrivateDataTy, 4> Privates;
4658   // Aggregate privates and sort them by the alignment.
4659   auto I = Data.PrivateCopies.begin();
4660   for (auto *E : Data.PrivateVars) {
4661     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4662     Privates.push_back(std::make_pair(
4663         C.getDeclAlign(VD),
4664         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4665                          /*PrivateElemInit=*/nullptr)));
4666     ++I;
4667   }
4668   I = Data.FirstprivateCopies.begin();
4669   auto IElemInitRef = Data.FirstprivateInits.begin();
4670   for (auto *E : Data.FirstprivateVars) {
4671     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4672     Privates.push_back(std::make_pair(
4673         C.getDeclAlign(VD),
4674         PrivateHelpersTy(
4675             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4676             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))));
4677     ++I;
4678     ++IElemInitRef;
4679   }
4680   I = Data.LastprivateCopies.begin();
4681   for (auto *E : Data.LastprivateVars) {
4682     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4683     Privates.push_back(std::make_pair(
4684         C.getDeclAlign(VD),
4685         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4686                          /*PrivateElemInit=*/nullptr)));
4687     ++I;
4688   }
4689   std::stable_sort(Privates.begin(), Privates.end(), stable_sort_comparator);
4690   auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4691   // Build type kmp_routine_entry_t (if not built yet).
4692   emitKmpRoutineEntryT(KmpInt32Ty);
4693   // Build type kmp_task_t (if not built yet).
4694   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4695     if (SavedKmpTaskloopTQTy.isNull()) {
4696       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4697           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4698     }
4699     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4700   } else {
4701     assert((D.getDirectiveKind() == OMPD_task ||
4702             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4703             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4704            "Expected taskloop, task or target directive");
4705     if (SavedKmpTaskTQTy.isNull()) {
4706       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4707           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4708     }
4709     KmpTaskTQTy = SavedKmpTaskTQTy;
4710   }
4711   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4712   // Build particular struct kmp_task_t for the given task.
4713   auto *KmpTaskTWithPrivatesQTyRD =
4714       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4715   auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4716   QualType KmpTaskTWithPrivatesPtrQTy =
4717       C.getPointerType(KmpTaskTWithPrivatesQTy);
4718   auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4719   auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo();
4720   auto *KmpTaskTWithPrivatesTySize = CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4721   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4722 
4723   // Emit initial values for private copies (if any).
4724   llvm::Value *TaskPrivatesMap = nullptr;
4725   auto *TaskPrivatesMapTy =
4726       std::next(cast<llvm::Function>(TaskFunction)->arg_begin(), 3)->getType();
4727   if (!Privates.empty()) {
4728     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4729     TaskPrivatesMap = emitTaskPrivateMappingFunction(
4730         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
4731         FI->getType(), Privates);
4732     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4733         TaskPrivatesMap, TaskPrivatesMapTy);
4734   } else {
4735     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4736         cast<llvm::PointerType>(TaskPrivatesMapTy));
4737   }
4738   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4739   // kmp_task_t *tt);
4740   auto *TaskEntry = emitProxyTaskFunction(
4741       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4742       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4743       TaskPrivatesMap);
4744 
4745   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4746   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4747   // kmp_routine_entry_t *task_entry);
4748   // Task flags. Format is taken from
4749   // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
4750   // description of kmp_tasking_flags struct.
4751   enum {
4752     TiedFlag = 0x1,
4753     FinalFlag = 0x2,
4754     DestructorsFlag = 0x8,
4755     PriorityFlag = 0x20
4756   };
4757   unsigned Flags = Data.Tied ? TiedFlag : 0;
4758   bool NeedsCleanup = false;
4759   if (!Privates.empty()) {
4760     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
4761     if (NeedsCleanup)
4762       Flags = Flags | DestructorsFlag;
4763   }
4764   if (Data.Priority.getInt())
4765     Flags = Flags | PriorityFlag;
4766   auto *TaskFlags =
4767       Data.Final.getPointer()
4768           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4769                                      CGF.Builder.getInt32(FinalFlag),
4770                                      CGF.Builder.getInt32(/*C=*/0))
4771           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4772   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4773   auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4774   llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
4775                               getThreadID(CGF, Loc), TaskFlags,
4776                               KmpTaskTWithPrivatesTySize, SharedsSize,
4777                               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4778                                   TaskEntry, KmpRoutineEntryPtrTy)};
4779   auto *NewTask = CGF.EmitRuntimeCall(
4780       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
4781   auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4782       NewTask, KmpTaskTWithPrivatesPtrTy);
4783   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4784                                                KmpTaskTWithPrivatesQTy);
4785   LValue TDBase =
4786       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4787   // Fill the data in the resulting kmp_task_t record.
4788   // Copy shareds if there are any.
4789   Address KmpTaskSharedsPtr = Address::invalid();
4790   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4791     KmpTaskSharedsPtr =
4792         Address(CGF.EmitLoadOfScalar(
4793                     CGF.EmitLValueForField(
4794                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4795                                            KmpTaskTShareds)),
4796                     Loc),
4797                 CGF.getNaturalTypeAlignment(SharedsTy));
4798     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4799     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4800     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4801   }
4802   // Emit initial values for private copies (if any).
4803   TaskResultTy Result;
4804   if (!Privates.empty()) {
4805     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4806                      SharedsTy, SharedsPtrTy, Data, Privates,
4807                      /*ForDup=*/false);
4808     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4809         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4810       Result.TaskDupFn = emitTaskDupFunction(
4811           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4812           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4813           /*WithLastIter=*/!Data.LastprivateVars.empty());
4814     }
4815   }
4816   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4817   enum { Priority = 0, Destructors = 1 };
4818   // Provide pointer to function with destructors for privates.
4819   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4820   auto *KmpCmplrdataUD = (*FI)->getType()->getAsUnionType()->getDecl();
4821   if (NeedsCleanup) {
4822     llvm::Value *DestructorFn = emitDestructorsFunction(
4823         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4824         KmpTaskTWithPrivatesQTy);
4825     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4826     LValue DestructorsLV = CGF.EmitLValueForField(
4827         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4828     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4829                               DestructorFn, KmpRoutineEntryPtrTy),
4830                           DestructorsLV);
4831   }
4832   // Set priority.
4833   if (Data.Priority.getInt()) {
4834     LValue Data2LV = CGF.EmitLValueForField(
4835         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4836     LValue PriorityLV = CGF.EmitLValueForField(
4837         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4838     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4839   }
4840   Result.NewTask = NewTask;
4841   Result.TaskEntry = TaskEntry;
4842   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4843   Result.TDBase = TDBase;
4844   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4845   return Result;
4846 }
4847 
4848 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
4849                                    const OMPExecutableDirective &D,
4850                                    llvm::Value *TaskFunction,
4851                                    QualType SharedsTy, Address Shareds,
4852                                    const Expr *IfCond,
4853                                    const OMPTaskDataTy &Data) {
4854   if (!CGF.HaveInsertPoint())
4855     return;
4856 
4857   TaskResultTy Result =
4858       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4859   llvm::Value *NewTask = Result.NewTask;
4860   llvm::Value *TaskEntry = Result.TaskEntry;
4861   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4862   LValue TDBase = Result.TDBase;
4863   RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4864   auto &C = CGM.getContext();
4865   // Process list of dependences.
4866   Address DependenciesArray = Address::invalid();
4867   unsigned NumDependencies = Data.Dependences.size();
4868   if (NumDependencies) {
4869     // Dependence kind for RTL.
4870     enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 };
4871     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4872     RecordDecl *KmpDependInfoRD;
4873     QualType FlagsTy =
4874         C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4875     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4876     if (KmpDependInfoTy.isNull()) {
4877       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4878       KmpDependInfoRD->startDefinition();
4879       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4880       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4881       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4882       KmpDependInfoRD->completeDefinition();
4883       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4884     } else
4885       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4886     CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy);
4887     // Define type kmp_depend_info[<Dependences.size()>];
4888     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4889         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
4890         ArrayType::Normal, /*IndexTypeQuals=*/0);
4891     // kmp_depend_info[<Dependences.size()>] deps;
4892     DependenciesArray =
4893         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4894     for (unsigned i = 0; i < NumDependencies; ++i) {
4895       const Expr *E = Data.Dependences[i].second;
4896       auto Addr = CGF.EmitLValue(E);
4897       llvm::Value *Size;
4898       QualType Ty = E->getType();
4899       if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4900         LValue UpAddrLVal =
4901             CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
4902         llvm::Value *UpAddr =
4903             CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
4904         llvm::Value *LowIntPtr =
4905             CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
4906         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
4907         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4908       } else
4909         Size = CGF.getTypeSize(Ty);
4910       auto Base = CGF.MakeAddrLValue(
4911           CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize),
4912           KmpDependInfoTy);
4913       // deps[i].base_addr = &<Dependences[i].second>;
4914       auto BaseAddrLVal = CGF.EmitLValueForField(
4915           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4916       CGF.EmitStoreOfScalar(
4917           CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
4918           BaseAddrLVal);
4919       // deps[i].len = sizeof(<Dependences[i].second>);
4920       auto LenLVal = CGF.EmitLValueForField(
4921           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4922       CGF.EmitStoreOfScalar(Size, LenLVal);
4923       // deps[i].flags = <Dependences[i].first>;
4924       RTLDependenceKindTy DepKind;
4925       switch (Data.Dependences[i].first) {
4926       case OMPC_DEPEND_in:
4927         DepKind = DepIn;
4928         break;
4929       // Out and InOut dependencies must use the same code.
4930       case OMPC_DEPEND_out:
4931       case OMPC_DEPEND_inout:
4932         DepKind = DepInOut;
4933         break;
4934       case OMPC_DEPEND_source:
4935       case OMPC_DEPEND_sink:
4936       case OMPC_DEPEND_unknown:
4937         llvm_unreachable("Unknown task dependence type");
4938       }
4939       auto FlagsLVal = CGF.EmitLValueForField(
4940           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4941       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4942                             FlagsLVal);
4943     }
4944     DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4945         CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()),
4946         CGF.VoidPtrTy);
4947   }
4948 
4949   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4950   // libcall.
4951   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4952   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4953   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4954   // list is not empty
4955   auto *ThreadID = getThreadID(CGF, Loc);
4956   auto *UpLoc = emitUpdateLocation(CGF, Loc);
4957   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4958   llvm::Value *DepTaskArgs[7];
4959   if (NumDependencies) {
4960     DepTaskArgs[0] = UpLoc;
4961     DepTaskArgs[1] = ThreadID;
4962     DepTaskArgs[2] = NewTask;
4963     DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
4964     DepTaskArgs[4] = DependenciesArray.getPointer();
4965     DepTaskArgs[5] = CGF.Builder.getInt32(0);
4966     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4967   }
4968   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
4969                         &TaskArgs,
4970                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4971     if (!Data.Tied) {
4972       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4973       auto PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4974       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4975     }
4976     if (NumDependencies) {
4977       CGF.EmitRuntimeCall(
4978           createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
4979     } else {
4980       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
4981                           TaskArgs);
4982     }
4983     // Check if parent region is untied and build return for untied task;
4984     if (auto *Region =
4985             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4986       Region->emitUntiedSwitch(CGF);
4987   };
4988 
4989   llvm::Value *DepWaitTaskArgs[6];
4990   if (NumDependencies) {
4991     DepWaitTaskArgs[0] = UpLoc;
4992     DepWaitTaskArgs[1] = ThreadID;
4993     DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
4994     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
4995     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4996     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4997   }
4998   auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
4999                         NumDependencies, &DepWaitTaskArgs,
5000                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5001     auto &RT = CGF.CGM.getOpenMPRuntime();
5002     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5003     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5004     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5005     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5006     // is specified.
5007     if (NumDependencies)
5008       CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
5009                           DepWaitTaskArgs);
5010     // Call proxy_task_entry(gtid, new_task);
5011     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5012                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5013       Action.Enter(CGF);
5014       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5015       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5016                                                           OutlinedFnArgs);
5017     };
5018 
5019     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5020     // kmp_task_t *new_task);
5021     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5022     // kmp_task_t *new_task);
5023     RegionCodeGenTy RCG(CodeGen);
5024     CommonActionTy Action(
5025         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
5026         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
5027     RCG.setAction(Action);
5028     RCG(CGF);
5029   };
5030 
5031   if (IfCond)
5032     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5033   else {
5034     RegionCodeGenTy ThenRCG(ThenCodeGen);
5035     ThenRCG(CGF);
5036   }
5037 }
5038 
5039 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5040                                        const OMPLoopDirective &D,
5041                                        llvm::Value *TaskFunction,
5042                                        QualType SharedsTy, Address Shareds,
5043                                        const Expr *IfCond,
5044                                        const OMPTaskDataTy &Data) {
5045   if (!CGF.HaveInsertPoint())
5046     return;
5047   TaskResultTy Result =
5048       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5049   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5050   // libcall.
5051   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5052   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5053   // sched, kmp_uint64 grainsize, void *task_dup);
5054   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5055   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5056   llvm::Value *IfVal;
5057   if (IfCond) {
5058     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5059                                       /*isSigned=*/true);
5060   } else
5061     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5062 
5063   LValue LBLVal = CGF.EmitLValueForField(
5064       Result.TDBase,
5065       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5066   auto *LBVar =
5067       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5068   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
5069                        /*IsInitializer=*/true);
5070   LValue UBLVal = CGF.EmitLValueForField(
5071       Result.TDBase,
5072       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5073   auto *UBVar =
5074       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5075   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
5076                        /*IsInitializer=*/true);
5077   LValue StLVal = CGF.EmitLValueForField(
5078       Result.TDBase,
5079       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5080   auto *StVar =
5081       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5082   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
5083                        /*IsInitializer=*/true);
5084   // Store reductions address.
5085   LValue RedLVal = CGF.EmitLValueForField(
5086       Result.TDBase,
5087       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5088   if (Data.Reductions)
5089     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5090   else {
5091     CGF.EmitNullInitialization(RedLVal.getAddress(),
5092                                CGF.getContext().VoidPtrTy);
5093   }
5094   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5095   llvm::Value *TaskArgs[] = {
5096       UpLoc,
5097       ThreadID,
5098       Result.NewTask,
5099       IfVal,
5100       LBLVal.getPointer(),
5101       UBLVal.getPointer(),
5102       CGF.EmitLoadOfScalar(StLVal, Loc),
5103       llvm::ConstantInt::getNullValue(
5104           CGF.IntTy), // Always 0 because taskgroup emitted by the compiler
5105       llvm::ConstantInt::getSigned(
5106           CGF.IntTy, Data.Schedule.getPointer()
5107                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5108                          : NoSchedule),
5109       Data.Schedule.getPointer()
5110           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5111                                       /*isSigned=*/false)
5112           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5113       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5114                              Result.TaskDupFn, CGF.VoidPtrTy)
5115                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5116   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
5117 }
5118 
5119 /// \brief Emit reduction operation for each element of array (required for
5120 /// array sections) LHS op = RHS.
5121 /// \param Type Type of array.
5122 /// \param LHSVar Variable on the left side of the reduction operation
5123 /// (references element of array in original variable).
5124 /// \param RHSVar Variable on the right side of the reduction operation
5125 /// (references element of array in original variable).
5126 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5127 /// RHSVar.
5128 static void EmitOMPAggregateReduction(
5129     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5130     const VarDecl *RHSVar,
5131     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5132                                   const Expr *, const Expr *)> &RedOpGen,
5133     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5134     const Expr *UpExpr = nullptr) {
5135   // Perform element-by-element initialization.
5136   QualType ElementTy;
5137   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5138   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5139 
5140   // Drill down to the base element type on both arrays.
5141   auto ArrayTy = Type->getAsArrayTypeUnsafe();
5142   auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5143 
5144   auto RHSBegin = RHSAddr.getPointer();
5145   auto LHSBegin = LHSAddr.getPointer();
5146   // Cast from pointer to array type to pointer to single element.
5147   auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5148   // The basic structure here is a while-do loop.
5149   auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5150   auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5151   auto IsEmpty =
5152       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5153   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5154 
5155   // Enter the loop body, making that address the current address.
5156   auto EntryBB = CGF.Builder.GetInsertBlock();
5157   CGF.EmitBlock(BodyBB);
5158 
5159   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5160 
5161   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5162       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5163   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5164   Address RHSElementCurrent =
5165       Address(RHSElementPHI,
5166               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5167 
5168   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5169       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5170   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5171   Address LHSElementCurrent =
5172       Address(LHSElementPHI,
5173               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5174 
5175   // Emit copy.
5176   CodeGenFunction::OMPPrivateScope Scope(CGF);
5177   Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; });
5178   Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; });
5179   Scope.Privatize();
5180   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5181   Scope.ForceCleanup();
5182 
5183   // Shift the address forward by one element.
5184   auto LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5185       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5186   auto RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5187       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5188   // Check whether we've reached the end.
5189   auto Done =
5190       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5191   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5192   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5193   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5194 
5195   // Done.
5196   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5197 }
5198 
5199 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5200 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5201 /// UDR combiner function.
5202 static void emitReductionCombiner(CodeGenFunction &CGF,
5203                                   const Expr *ReductionOp) {
5204   if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
5205     if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5206       if (auto *DRE =
5207               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5208         if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5209           std::pair<llvm::Function *, llvm::Function *> Reduction =
5210               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5211           RValue Func = RValue::get(Reduction.first);
5212           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5213           CGF.EmitIgnoredExpr(ReductionOp);
5214           return;
5215         }
5216   CGF.EmitIgnoredExpr(ReductionOp);
5217 }
5218 
5219 llvm::Value *CGOpenMPRuntime::emitReductionFunction(
5220     CodeGenModule &CGM, SourceLocation Loc, llvm::Type *ArgsType,
5221     ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
5222     ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
5223   auto &C = CGM.getContext();
5224 
5225   // void reduction_func(void *LHSArg, void *RHSArg);
5226   FunctionArgList Args;
5227   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5228                            ImplicitParamDecl::Other);
5229   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5230                            ImplicitParamDecl::Other);
5231   Args.push_back(&LHSArg);
5232   Args.push_back(&RHSArg);
5233   auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5234   auto *Fn = llvm::Function::Create(
5235       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
5236       ".omp.reduction.reduction_func", &CGM.getModule());
5237   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5238   Fn->setDoesNotRecurse();
5239   CodeGenFunction CGF(CGM);
5240   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5241 
5242   // Dst = (void*[n])(LHSArg);
5243   // Src = (void*[n])(RHSArg);
5244   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5245       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5246       ArgsType), CGF.getPointerAlign());
5247   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5248       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5249       ArgsType), CGF.getPointerAlign());
5250 
5251   //  ...
5252   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5253   //  ...
5254   CodeGenFunction::OMPPrivateScope Scope(CGF);
5255   auto IPriv = Privates.begin();
5256   unsigned Idx = 0;
5257   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5258     auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5259     Scope.addPrivate(RHSVar, [&]() -> Address {
5260       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5261     });
5262     auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5263     Scope.addPrivate(LHSVar, [&]() -> Address {
5264       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5265     });
5266     QualType PrivTy = (*IPriv)->getType();
5267     if (PrivTy->isVariablyModifiedType()) {
5268       // Get array size and emit VLA type.
5269       ++Idx;
5270       Address Elem =
5271           CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize());
5272       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5273       auto *VLA = CGF.getContext().getAsVariableArrayType(PrivTy);
5274       auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5275       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5276           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5277       CGF.EmitVariablyModifiedType(PrivTy);
5278     }
5279   }
5280   Scope.Privatize();
5281   IPriv = Privates.begin();
5282   auto ILHS = LHSExprs.begin();
5283   auto IRHS = RHSExprs.begin();
5284   for (auto *E : ReductionOps) {
5285     if ((*IPriv)->getType()->isArrayType()) {
5286       // Emit reduction for array section.
5287       auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5288       auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5289       EmitOMPAggregateReduction(
5290           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5291           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5292             emitReductionCombiner(CGF, E);
5293           });
5294     } else
5295       // Emit reduction for array subscript or single variable.
5296       emitReductionCombiner(CGF, E);
5297     ++IPriv;
5298     ++ILHS;
5299     ++IRHS;
5300   }
5301   Scope.ForceCleanup();
5302   CGF.FinishFunction();
5303   return Fn;
5304 }
5305 
5306 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5307                                                   const Expr *ReductionOp,
5308                                                   const Expr *PrivateRef,
5309                                                   const DeclRefExpr *LHS,
5310                                                   const DeclRefExpr *RHS) {
5311   if (PrivateRef->getType()->isArrayType()) {
5312     // Emit reduction for array section.
5313     auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5314     auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5315     EmitOMPAggregateReduction(
5316         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5317         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5318           emitReductionCombiner(CGF, ReductionOp);
5319         });
5320   } else
5321     // Emit reduction for array subscript or single variable.
5322     emitReductionCombiner(CGF, ReductionOp);
5323 }
5324 
5325 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5326                                     ArrayRef<const Expr *> Privates,
5327                                     ArrayRef<const Expr *> LHSExprs,
5328                                     ArrayRef<const Expr *> RHSExprs,
5329                                     ArrayRef<const Expr *> ReductionOps,
5330                                     ReductionOptionsTy Options) {
5331   if (!CGF.HaveInsertPoint())
5332     return;
5333 
5334   bool WithNowait = Options.WithNowait;
5335   bool SimpleReduction = Options.SimpleReduction;
5336 
5337   // Next code should be emitted for reduction:
5338   //
5339   // static kmp_critical_name lock = { 0 };
5340   //
5341   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5342   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5343   //  ...
5344   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5345   //  *(Type<n>-1*)rhs[<n>-1]);
5346   // }
5347   //
5348   // ...
5349   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5350   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5351   // RedList, reduce_func, &<lock>)) {
5352   // case 1:
5353   //  ...
5354   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5355   //  ...
5356   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5357   // break;
5358   // case 2:
5359   //  ...
5360   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5361   //  ...
5362   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5363   // break;
5364   // default:;
5365   // }
5366   //
5367   // if SimpleReduction is true, only the next code is generated:
5368   //  ...
5369   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5370   //  ...
5371 
5372   auto &C = CGM.getContext();
5373 
5374   if (SimpleReduction) {
5375     CodeGenFunction::RunCleanupsScope Scope(CGF);
5376     auto IPriv = Privates.begin();
5377     auto ILHS = LHSExprs.begin();
5378     auto IRHS = RHSExprs.begin();
5379     for (auto *E : ReductionOps) {
5380       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5381                                   cast<DeclRefExpr>(*IRHS));
5382       ++IPriv;
5383       ++ILHS;
5384       ++IRHS;
5385     }
5386     return;
5387   }
5388 
5389   // 1. Build a list of reduction variables.
5390   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5391   auto Size = RHSExprs.size();
5392   for (auto *E : Privates) {
5393     if (E->getType()->isVariablyModifiedType())
5394       // Reserve place for array size.
5395       ++Size;
5396   }
5397   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5398   QualType ReductionArrayTy =
5399       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
5400                              /*IndexTypeQuals=*/0);
5401   Address ReductionList =
5402       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5403   auto IPriv = Privates.begin();
5404   unsigned Idx = 0;
5405   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5406     Address Elem =
5407       CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize());
5408     CGF.Builder.CreateStore(
5409         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5410             CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
5411         Elem);
5412     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5413       // Store array size.
5414       ++Idx;
5415       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
5416                                              CGF.getPointerSize());
5417       llvm::Value *Size = CGF.Builder.CreateIntCast(
5418           CGF.getVLASize(
5419                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5420               .NumElts,
5421           CGF.SizeTy, /*isSigned=*/false);
5422       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5423                               Elem);
5424     }
5425   }
5426 
5427   // 2. Emit reduce_func().
5428   auto *ReductionFn = emitReductionFunction(
5429       CGM, Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(),
5430       Privates, LHSExprs, RHSExprs, ReductionOps);
5431 
5432   // 3. Create static kmp_critical_name lock = { 0 };
5433   auto *Lock = getCriticalRegionLock(".reduction");
5434 
5435   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5436   // RedList, reduce_func, &<lock>);
5437   auto *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5438   auto *ThreadId = getThreadID(CGF, Loc);
5439   auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5440   auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5441       ReductionList.getPointer(), CGF.VoidPtrTy);
5442   llvm::Value *Args[] = {
5443       IdentTLoc,                             // ident_t *<loc>
5444       ThreadId,                              // i32 <gtid>
5445       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5446       ReductionArrayTySize,                  // size_type sizeof(RedList)
5447       RL,                                    // void *RedList
5448       ReductionFn, // void (*) (void *, void *) <reduce_func>
5449       Lock         // kmp_critical_name *&<lock>
5450   };
5451   auto Res = CGF.EmitRuntimeCall(
5452       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
5453                                        : OMPRTL__kmpc_reduce),
5454       Args);
5455 
5456   // 5. Build switch(res)
5457   auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5458   auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5459 
5460   // 6. Build case 1:
5461   //  ...
5462   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5463   //  ...
5464   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5465   // break;
5466   auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5467   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5468   CGF.EmitBlock(Case1BB);
5469 
5470   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5471   llvm::Value *EndArgs[] = {
5472       IdentTLoc, // ident_t *<loc>
5473       ThreadId,  // i32 <gtid>
5474       Lock       // kmp_critical_name *&<lock>
5475   };
5476   auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps](
5477       CodeGenFunction &CGF, PrePostActionTy &Action) {
5478     auto &RT = CGF.CGM.getOpenMPRuntime();
5479     auto IPriv = Privates.begin();
5480     auto ILHS = LHSExprs.begin();
5481     auto IRHS = RHSExprs.begin();
5482     for (auto *E : ReductionOps) {
5483       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5484                                      cast<DeclRefExpr>(*IRHS));
5485       ++IPriv;
5486       ++ILHS;
5487       ++IRHS;
5488     }
5489   };
5490   RegionCodeGenTy RCG(CodeGen);
5491   CommonActionTy Action(
5492       nullptr, llvm::None,
5493       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
5494                                        : OMPRTL__kmpc_end_reduce),
5495       EndArgs);
5496   RCG.setAction(Action);
5497   RCG(CGF);
5498 
5499   CGF.EmitBranch(DefaultBB);
5500 
5501   // 7. Build case 2:
5502   //  ...
5503   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5504   //  ...
5505   // break;
5506   auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5507   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5508   CGF.EmitBlock(Case2BB);
5509 
5510   auto &&AtomicCodeGen = [Loc, &Privates, &LHSExprs, &RHSExprs, &ReductionOps](
5511       CodeGenFunction &CGF, PrePostActionTy &Action) {
5512     auto ILHS = LHSExprs.begin();
5513     auto IRHS = RHSExprs.begin();
5514     auto IPriv = Privates.begin();
5515     for (auto *E : ReductionOps) {
5516       const Expr *XExpr = nullptr;
5517       const Expr *EExpr = nullptr;
5518       const Expr *UpExpr = nullptr;
5519       BinaryOperatorKind BO = BO_Comma;
5520       if (auto *BO = dyn_cast<BinaryOperator>(E)) {
5521         if (BO->getOpcode() == BO_Assign) {
5522           XExpr = BO->getLHS();
5523           UpExpr = BO->getRHS();
5524         }
5525       }
5526       // Try to emit update expression as a simple atomic.
5527       auto *RHSExpr = UpExpr;
5528       if (RHSExpr) {
5529         // Analyze RHS part of the whole expression.
5530         if (auto *ACO = dyn_cast<AbstractConditionalOperator>(
5531                 RHSExpr->IgnoreParenImpCasts())) {
5532           // If this is a conditional operator, analyze its condition for
5533           // min/max reduction operator.
5534           RHSExpr = ACO->getCond();
5535         }
5536         if (auto *BORHS =
5537                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5538           EExpr = BORHS->getRHS();
5539           BO = BORHS->getOpcode();
5540         }
5541       }
5542       if (XExpr) {
5543         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5544         auto &&AtomicRedGen = [BO, VD,
5545                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5546                                     const Expr *EExpr, const Expr *UpExpr) {
5547           LValue X = CGF.EmitLValue(XExpr);
5548           RValue E;
5549           if (EExpr)
5550             E = CGF.EmitAnyExpr(EExpr);
5551           CGF.EmitOMPAtomicSimpleUpdateExpr(
5552               X, E, BO, /*IsXLHSInRHSPart=*/true,
5553               llvm::AtomicOrdering::Monotonic, Loc,
5554               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5555                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5556                 PrivateScope.addPrivate(
5557                     VD, [&CGF, VD, XRValue, Loc]() -> Address {
5558                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5559                       CGF.emitOMPSimpleStore(
5560                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5561                           VD->getType().getNonReferenceType(), Loc);
5562                       return LHSTemp;
5563                     });
5564                 (void)PrivateScope.Privatize();
5565                 return CGF.EmitAnyExpr(UpExpr);
5566               });
5567         };
5568         if ((*IPriv)->getType()->isArrayType()) {
5569           // Emit atomic reduction for array section.
5570           auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5571           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5572                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5573         } else
5574           // Emit atomic reduction for array subscript or single variable.
5575           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5576       } else {
5577         // Emit as a critical region.
5578         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5579                                      const Expr *, const Expr *) {
5580           auto &RT = CGF.CGM.getOpenMPRuntime();
5581           RT.emitCriticalRegion(
5582               CGF, ".atomic_reduction",
5583               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5584                 Action.Enter(CGF);
5585                 emitReductionCombiner(CGF, E);
5586               },
5587               Loc);
5588         };
5589         if ((*IPriv)->getType()->isArrayType()) {
5590           auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5591           auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5592           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5593                                     CritRedGen);
5594         } else
5595           CritRedGen(CGF, nullptr, nullptr, nullptr);
5596       }
5597       ++ILHS;
5598       ++IRHS;
5599       ++IPriv;
5600     }
5601   };
5602   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5603   if (!WithNowait) {
5604     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5605     llvm::Value *EndArgs[] = {
5606         IdentTLoc, // ident_t *<loc>
5607         ThreadId,  // i32 <gtid>
5608         Lock       // kmp_critical_name *&<lock>
5609     };
5610     CommonActionTy Action(nullptr, llvm::None,
5611                           createRuntimeFunction(OMPRTL__kmpc_end_reduce),
5612                           EndArgs);
5613     AtomicRCG.setAction(Action);
5614     AtomicRCG(CGF);
5615   } else
5616     AtomicRCG(CGF);
5617 
5618   CGF.EmitBranch(DefaultBB);
5619   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5620 }
5621 
5622 /// Generates unique name for artificial threadprivate variables.
5623 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5624 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5625                                       const Expr *Ref) {
5626   SmallString<256> Buffer;
5627   llvm::raw_svector_ostream Out(Buffer);
5628   const clang::DeclRefExpr *DE;
5629   const VarDecl *D = ::getBaseDecl(Ref, DE);
5630   if (!D)
5631     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5632   D = D->getCanonicalDecl();
5633   Out << Prefix << "."
5634       << (D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D))
5635       << "_" << D->getCanonicalDecl()->getLocStart().getRawEncoding();
5636   return Out.str();
5637 }
5638 
5639 /// Emits reduction initializer function:
5640 /// \code
5641 /// void @.red_init(void* %arg) {
5642 /// %0 = bitcast void* %arg to <type>*
5643 /// store <type> <init>, <type>* %0
5644 /// ret void
5645 /// }
5646 /// \endcode
5647 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5648                                            SourceLocation Loc,
5649                                            ReductionCodeGen &RCG, unsigned N) {
5650   auto &C = CGM.getContext();
5651   FunctionArgList Args;
5652   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5653                           ImplicitParamDecl::Other);
5654   Args.emplace_back(&Param);
5655   auto &FnInfo =
5656       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5657   auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5658   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5659                                     ".red_init.", &CGM.getModule());
5660   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5661   Fn->setDoesNotRecurse();
5662   CodeGenFunction CGF(CGM);
5663   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5664   Address PrivateAddr = CGF.EmitLoadOfPointer(
5665       CGF.GetAddrOfLocalVar(&Param),
5666       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5667   llvm::Value *Size = nullptr;
5668   // If the size of the reduction item is non-constant, load it from global
5669   // threadprivate variable.
5670   if (RCG.getSizes(N).second) {
5671     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5672         CGF, CGM.getContext().getSizeType(),
5673         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5674     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5675                                 CGM.getContext().getSizeType(), Loc);
5676   }
5677   RCG.emitAggregateType(CGF, N, Size);
5678   LValue SharedLVal;
5679   // If initializer uses initializer from declare reduction construct, emit a
5680   // pointer to the address of the original reduction item (reuired by reduction
5681   // initializer)
5682   if (RCG.usesReductionInitializer(N)) {
5683     Address SharedAddr =
5684         CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5685             CGF, CGM.getContext().VoidPtrTy,
5686             generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
5687     SharedAddr = CGF.EmitLoadOfPointer(
5688         SharedAddr,
5689         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5690     SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5691   } else {
5692     SharedLVal = CGF.MakeNaturalAlignAddrLValue(
5693         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5694         CGM.getContext().VoidPtrTy);
5695   }
5696   // Emit the initializer:
5697   // %0 = bitcast void* %arg to <type>*
5698   // store <type> <init>, <type>* %0
5699   RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
5700                          [](CodeGenFunction &) { return false; });
5701   CGF.FinishFunction();
5702   return Fn;
5703 }
5704 
5705 /// Emits reduction combiner function:
5706 /// \code
5707 /// void @.red_comb(void* %arg0, void* %arg1) {
5708 /// %lhs = bitcast void* %arg0 to <type>*
5709 /// %rhs = bitcast void* %arg1 to <type>*
5710 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5711 /// store <type> %2, <type>* %lhs
5712 /// ret void
5713 /// }
5714 /// \endcode
5715 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5716                                            SourceLocation Loc,
5717                                            ReductionCodeGen &RCG, unsigned N,
5718                                            const Expr *ReductionOp,
5719                                            const Expr *LHS, const Expr *RHS,
5720                                            const Expr *PrivateRef) {
5721   auto &C = CGM.getContext();
5722   auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5723   auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5724   FunctionArgList Args;
5725   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5726                                C.VoidPtrTy, ImplicitParamDecl::Other);
5727   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5728                             ImplicitParamDecl::Other);
5729   Args.emplace_back(&ParamInOut);
5730   Args.emplace_back(&ParamIn);
5731   auto &FnInfo =
5732       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5733   auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5734   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5735                                     ".red_comb.", &CGM.getModule());
5736   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5737   Fn->setDoesNotRecurse();
5738   CodeGenFunction CGF(CGM);
5739   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5740   llvm::Value *Size = nullptr;
5741   // If the size of the reduction item is non-constant, load it from global
5742   // threadprivate variable.
5743   if (RCG.getSizes(N).second) {
5744     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5745         CGF, CGM.getContext().getSizeType(),
5746         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5747     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5748                                 CGM.getContext().getSizeType(), Loc);
5749   }
5750   RCG.emitAggregateType(CGF, N, Size);
5751   // Remap lhs and rhs variables to the addresses of the function arguments.
5752   // %lhs = bitcast void* %arg0 to <type>*
5753   // %rhs = bitcast void* %arg1 to <type>*
5754   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5755   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() -> Address {
5756     // Pull out the pointer to the variable.
5757     Address PtrAddr = CGF.EmitLoadOfPointer(
5758         CGF.GetAddrOfLocalVar(&ParamInOut),
5759         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5760     return CGF.Builder.CreateElementBitCast(
5761         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5762   });
5763   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() -> Address {
5764     // Pull out the pointer to the variable.
5765     Address PtrAddr = CGF.EmitLoadOfPointer(
5766         CGF.GetAddrOfLocalVar(&ParamIn),
5767         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5768     return CGF.Builder.CreateElementBitCast(
5769         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5770   });
5771   PrivateScope.Privatize();
5772   // Emit the combiner body:
5773   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5774   // store <type> %2, <type>* %lhs
5775   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5776       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5777       cast<DeclRefExpr>(RHS));
5778   CGF.FinishFunction();
5779   return Fn;
5780 }
5781 
5782 /// Emits reduction finalizer function:
5783 /// \code
5784 /// void @.red_fini(void* %arg) {
5785 /// %0 = bitcast void* %arg to <type>*
5786 /// <destroy>(<type>* %0)
5787 /// ret void
5788 /// }
5789 /// \endcode
5790 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5791                                            SourceLocation Loc,
5792                                            ReductionCodeGen &RCG, unsigned N) {
5793   if (!RCG.needCleanups(N))
5794     return nullptr;
5795   auto &C = CGM.getContext();
5796   FunctionArgList Args;
5797   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5798                           ImplicitParamDecl::Other);
5799   Args.emplace_back(&Param);
5800   auto &FnInfo =
5801       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5802   auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5803   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5804                                     ".red_fini.", &CGM.getModule());
5805   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5806   Fn->setDoesNotRecurse();
5807   CodeGenFunction CGF(CGM);
5808   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5809   Address PrivateAddr = CGF.EmitLoadOfPointer(
5810       CGF.GetAddrOfLocalVar(&Param),
5811       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5812   llvm::Value *Size = nullptr;
5813   // If the size of the reduction item is non-constant, load it from global
5814   // threadprivate variable.
5815   if (RCG.getSizes(N).second) {
5816     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5817         CGF, CGM.getContext().getSizeType(),
5818         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5819     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5820                                 CGM.getContext().getSizeType(), Loc);
5821   }
5822   RCG.emitAggregateType(CGF, N, Size);
5823   // Emit the finalizer body:
5824   // <destroy>(<type>* %0)
5825   RCG.emitCleanups(CGF, N, PrivateAddr);
5826   CGF.FinishFunction();
5827   return Fn;
5828 }
5829 
5830 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5831     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5832     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5833   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5834     return nullptr;
5835 
5836   // Build typedef struct:
5837   // kmp_task_red_input {
5838   //   void *reduce_shar; // shared reduction item
5839   //   size_t reduce_size; // size of data item
5840   //   void *reduce_init; // data initialization routine
5841   //   void *reduce_fini; // data finalization routine
5842   //   void *reduce_comb; // data combiner routine
5843   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
5844   // } kmp_task_red_input_t;
5845   ASTContext &C = CGM.getContext();
5846   auto *RD = C.buildImplicitRecord("kmp_task_red_input_t");
5847   RD->startDefinition();
5848   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5849   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5850   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5851   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5852   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5853   const FieldDecl *FlagsFD = addFieldToRecordDecl(
5854       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5855   RD->completeDefinition();
5856   QualType RDType = C.getRecordType(RD);
5857   unsigned Size = Data.ReductionVars.size();
5858   llvm::APInt ArraySize(/*numBits=*/64, Size);
5859   QualType ArrayRDType = C.getConstantArrayType(
5860       RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0);
5861   // kmp_task_red_input_t .rd_input.[Size];
5862   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5863   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
5864                        Data.ReductionOps);
5865   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5866     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5867     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5868                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5869     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5870         TaskRedInput.getPointer(), Idxs,
5871         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5872         ".rd_input.gep.");
5873     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
5874     // ElemLVal.reduce_shar = &Shareds[Cnt];
5875     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5876     RCG.emitSharedLValue(CGF, Cnt);
5877     llvm::Value *CastedShared =
5878         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer());
5879     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
5880     RCG.emitAggregateType(CGF, Cnt);
5881     llvm::Value *SizeValInChars;
5882     llvm::Value *SizeVal;
5883     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5884     // We use delayed creation/initialization for VLAs, array sections and
5885     // custom reduction initializations. It is required because runtime does not
5886     // provide the way to pass the sizes of VLAs/array sections to
5887     // initializer/combiner/finalizer functions and does not pass the pointer to
5888     // original reduction item to the initializer. Instead threadprivate global
5889     // variables are used to store these values and use them in the functions.
5890     bool DelayedCreation = !!SizeVal;
5891     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5892                                                /*isSigned=*/false);
5893     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5894     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5895     // ElemLVal.reduce_init = init;
5896     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5897     llvm::Value *InitAddr =
5898         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
5899     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5900     DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
5901     // ElemLVal.reduce_fini = fini;
5902     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5903     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5904     llvm::Value *FiniAddr = Fini
5905                                 ? CGF.EmitCastToVoidPtr(Fini)
5906                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5907     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5908     // ElemLVal.reduce_comb = comb;
5909     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5910     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
5911         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5912         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
5913     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5914     // ElemLVal.flags = 0;
5915     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5916     if (DelayedCreation) {
5917       CGF.EmitStoreOfScalar(
5918           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*IsSigned=*/true),
5919           FlagsLVal);
5920     } else
5921       CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
5922   }
5923   // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
5924   // *data);
5925   llvm::Value *Args[] = {
5926       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5927                                 /*isSigned=*/true),
5928       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5929       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
5930                                                       CGM.VoidPtrTy)};
5931   return CGF.EmitRuntimeCall(
5932       createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args);
5933 }
5934 
5935 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
5936                                               SourceLocation Loc,
5937                                               ReductionCodeGen &RCG,
5938                                               unsigned N) {
5939   auto Sizes = RCG.getSizes(N);
5940   // Emit threadprivate global variable if the type is non-constant
5941   // (Sizes.second = nullptr).
5942   if (Sizes.second) {
5943     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
5944                                                      /*isSigned=*/false);
5945     Address SizeAddr = getAddrOfArtificialThreadPrivate(
5946         CGF, CGM.getContext().getSizeType(),
5947         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5948     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
5949   }
5950   // Store address of the original reduction item if custom initializer is used.
5951   if (RCG.usesReductionInitializer(N)) {
5952     Address SharedAddr = getAddrOfArtificialThreadPrivate(
5953         CGF, CGM.getContext().VoidPtrTy,
5954         generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
5955     CGF.Builder.CreateStore(
5956         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5957             RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy),
5958         SharedAddr, /*IsVolatile=*/false);
5959   }
5960 }
5961 
5962 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
5963                                               SourceLocation Loc,
5964                                               llvm::Value *ReductionsPtr,
5965                                               LValue SharedLVal) {
5966   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5967   // *d);
5968   llvm::Value *Args[] = {
5969       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5970                                 /*isSigned=*/true),
5971       ReductionsPtr,
5972       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(),
5973                                                       CGM.VoidPtrTy)};
5974   return Address(
5975       CGF.EmitRuntimeCall(
5976           createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
5977       SharedLVal.getAlignment());
5978 }
5979 
5980 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
5981                                        SourceLocation Loc) {
5982   if (!CGF.HaveInsertPoint())
5983     return;
5984   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5985   // global_tid);
5986   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
5987   // Ignore return result until untied tasks are supported.
5988   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
5989   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5990     Region->emitUntiedSwitch(CGF);
5991 }
5992 
5993 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
5994                                            OpenMPDirectiveKind InnerKind,
5995                                            const RegionCodeGenTy &CodeGen,
5996                                            bool HasCancel) {
5997   if (!CGF.HaveInsertPoint())
5998     return;
5999   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6000   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6001 }
6002 
6003 namespace {
6004 enum RTCancelKind {
6005   CancelNoreq = 0,
6006   CancelParallel = 1,
6007   CancelLoop = 2,
6008   CancelSections = 3,
6009   CancelTaskgroup = 4
6010 };
6011 } // anonymous namespace
6012 
6013 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6014   RTCancelKind CancelKind = CancelNoreq;
6015   if (CancelRegion == OMPD_parallel)
6016     CancelKind = CancelParallel;
6017   else if (CancelRegion == OMPD_for)
6018     CancelKind = CancelLoop;
6019   else if (CancelRegion == OMPD_sections)
6020     CancelKind = CancelSections;
6021   else {
6022     assert(CancelRegion == OMPD_taskgroup);
6023     CancelKind = CancelTaskgroup;
6024   }
6025   return CancelKind;
6026 }
6027 
6028 void CGOpenMPRuntime::emitCancellationPointCall(
6029     CodeGenFunction &CGF, SourceLocation Loc,
6030     OpenMPDirectiveKind CancelRegion) {
6031   if (!CGF.HaveInsertPoint())
6032     return;
6033   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6034   // global_tid, kmp_int32 cncl_kind);
6035   if (auto *OMPRegionInfo =
6036           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6037     // For 'cancellation point taskgroup', the task region info may not have a
6038     // cancel. This may instead happen in another adjacent task.
6039     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6040       llvm::Value *Args[] = {
6041           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6042           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6043       // Ignore return result until untied tasks are supported.
6044       auto *Result = CGF.EmitRuntimeCall(
6045           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
6046       // if (__kmpc_cancellationpoint()) {
6047       //   exit from construct;
6048       // }
6049       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
6050       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
6051       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
6052       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6053       CGF.EmitBlock(ExitBB);
6054       // exit from construct;
6055       auto CancelDest =
6056           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6057       CGF.EmitBranchThroughCleanup(CancelDest);
6058       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6059     }
6060   }
6061 }
6062 
6063 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6064                                      const Expr *IfCond,
6065                                      OpenMPDirectiveKind CancelRegion) {
6066   if (!CGF.HaveInsertPoint())
6067     return;
6068   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6069   // kmp_int32 cncl_kind);
6070   if (auto *OMPRegionInfo =
6071           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6072     auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
6073                                                         PrePostActionTy &) {
6074       auto &RT = CGF.CGM.getOpenMPRuntime();
6075       llvm::Value *Args[] = {
6076           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6077           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6078       // Ignore return result until untied tasks are supported.
6079       auto *Result = CGF.EmitRuntimeCall(
6080           RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
6081       // if (__kmpc_cancel()) {
6082       //   exit from construct;
6083       // }
6084       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
6085       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
6086       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
6087       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6088       CGF.EmitBlock(ExitBB);
6089       // exit from construct;
6090       auto CancelDest =
6091           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6092       CGF.EmitBranchThroughCleanup(CancelDest);
6093       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6094     };
6095     if (IfCond)
6096       emitOMPIfClause(CGF, IfCond, ThenGen,
6097                       [](CodeGenFunction &, PrePostActionTy &) {});
6098     else {
6099       RegionCodeGenTy ThenRCG(ThenGen);
6100       ThenRCG(CGF);
6101     }
6102   }
6103 }
6104 
6105 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6106     const OMPExecutableDirective &D, StringRef ParentName,
6107     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6108     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6109   assert(!ParentName.empty() && "Invalid target region parent name!");
6110 
6111   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6112                                    IsOffloadEntry, CodeGen);
6113 }
6114 
6115 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6116     const OMPExecutableDirective &D, StringRef ParentName,
6117     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6118     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6119   // Create a unique name for the entry function using the source location
6120   // information of the current target region. The name will be something like:
6121   //
6122   // __omp_offloading_DD_FFFF_PP_lBB
6123   //
6124   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6125   // mangled name of the function that encloses the target region and BB is the
6126   // line number of the target region.
6127 
6128   unsigned DeviceID;
6129   unsigned FileID;
6130   unsigned Line;
6131   getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID,
6132                            Line);
6133   SmallString<64> EntryFnName;
6134   {
6135     llvm::raw_svector_ostream OS(EntryFnName);
6136     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6137        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6138   }
6139 
6140   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6141 
6142   CodeGenFunction CGF(CGM, true);
6143   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6144   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6145 
6146   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
6147 
6148   // If this target outline function is not an offload entry, we don't need to
6149   // register it.
6150   if (!IsOffloadEntry)
6151     return;
6152 
6153   // The target region ID is used by the runtime library to identify the current
6154   // target region, so it only has to be unique and not necessarily point to
6155   // anything. It could be the pointer to the outlined function that implements
6156   // the target region, but we aren't using that so that the compiler doesn't
6157   // need to keep that, and could therefore inline the host function if proven
6158   // worthwhile during optimization. In the other hand, if emitting code for the
6159   // device, the ID has to be the function address so that it can retrieved from
6160   // the offloading entry and launched by the runtime library. We also mark the
6161   // outlined function to have external linkage in case we are emitting code for
6162   // the device, because these functions will be entry points to the device.
6163 
6164   if (CGM.getLangOpts().OpenMPIsDevice) {
6165     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6166     OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage);
6167     OutlinedFn->setDSOLocal(false);
6168   } else
6169     OutlinedFnID = new llvm::GlobalVariable(
6170         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6171         llvm::GlobalValue::PrivateLinkage,
6172         llvm::Constant::getNullValue(CGM.Int8Ty), ".omp_offload.region_id");
6173 
6174   // Register the information for the entry associated with this target region.
6175   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6176       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6177       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6178 }
6179 
6180 /// discard all CompoundStmts intervening between two constructs
6181 static const Stmt *ignoreCompoundStmts(const Stmt *Body) {
6182   while (auto *CS = dyn_cast_or_null<CompoundStmt>(Body))
6183     Body = CS->body_front();
6184 
6185   return Body;
6186 }
6187 
6188 /// Emit the number of teams for a target directive.  Inspect the num_teams
6189 /// clause associated with a teams construct combined or closely nested
6190 /// with the target directive.
6191 ///
6192 /// Emit a team of size one for directives such as 'target parallel' that
6193 /// have no associated teams construct.
6194 ///
6195 /// Otherwise, return nullptr.
6196 static llvm::Value *
6197 emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
6198                                CodeGenFunction &CGF,
6199                                const OMPExecutableDirective &D) {
6200 
6201   assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
6202                                               "teams directive expected to be "
6203                                               "emitted only for the host!");
6204 
6205   auto &Bld = CGF.Builder;
6206 
6207   // If the target directive is combined with a teams directive:
6208   //   Return the value in the num_teams clause, if any.
6209   //   Otherwise, return 0 to denote the runtime default.
6210   if (isOpenMPTeamsDirective(D.getDirectiveKind())) {
6211     if (const auto *NumTeamsClause = D.getSingleClause<OMPNumTeamsClause>()) {
6212       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6213       auto NumTeams = CGF.EmitScalarExpr(NumTeamsClause->getNumTeams(),
6214                                          /*IgnoreResultAssign*/ true);
6215       return Bld.CreateIntCast(NumTeams, CGF.Int32Ty,
6216                                /*IsSigned=*/true);
6217     }
6218 
6219     // The default value is 0.
6220     return Bld.getInt32(0);
6221   }
6222 
6223   // If the target directive is combined with a parallel directive but not a
6224   // teams directive, start one team.
6225   if (isOpenMPParallelDirective(D.getDirectiveKind()))
6226     return Bld.getInt32(1);
6227 
6228   // If the current target region has a teams region enclosed, we need to get
6229   // the number of teams to pass to the runtime function call. This is done
6230   // by generating the expression in a inlined region. This is required because
6231   // the expression is captured in the enclosing target environment when the
6232   // teams directive is not combined with target.
6233 
6234   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6235 
6236   if (auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>(
6237           ignoreCompoundStmts(CS.getCapturedStmt()))) {
6238     if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) {
6239       if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) {
6240         CGOpenMPInnerExprInfo CGInfo(CGF, CS);
6241         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6242         llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams());
6243         return Bld.CreateIntCast(NumTeams, CGF.Int32Ty,
6244                                  /*IsSigned=*/true);
6245       }
6246 
6247       // If we have an enclosed teams directive but no num_teams clause we use
6248       // the default value 0.
6249       return Bld.getInt32(0);
6250     }
6251   }
6252 
6253   // No teams associated with the directive.
6254   return nullptr;
6255 }
6256 
6257 /// Emit the number of threads for a target directive.  Inspect the
6258 /// thread_limit clause associated with a teams construct combined or closely
6259 /// nested with the target directive.
6260 ///
6261 /// Emit the num_threads clause for directives such as 'target parallel' that
6262 /// have no associated teams construct.
6263 ///
6264 /// Otherwise, return nullptr.
6265 static llvm::Value *
6266 emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
6267                                  CodeGenFunction &CGF,
6268                                  const OMPExecutableDirective &D) {
6269 
6270   assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
6271                                               "teams directive expected to be "
6272                                               "emitted only for the host!");
6273 
6274   auto &Bld = CGF.Builder;
6275 
6276   //
6277   // If the target directive is combined with a teams directive:
6278   //   Return the value in the thread_limit clause, if any.
6279   //
6280   // If the target directive is combined with a parallel directive:
6281   //   Return the value in the num_threads clause, if any.
6282   //
6283   // If both clauses are set, select the minimum of the two.
6284   //
6285   // If neither teams or parallel combined directives set the number of threads
6286   // in a team, return 0 to denote the runtime default.
6287   //
6288   // If this is not a teams directive return nullptr.
6289 
6290   if (isOpenMPTeamsDirective(D.getDirectiveKind()) ||
6291       isOpenMPParallelDirective(D.getDirectiveKind())) {
6292     llvm::Value *DefaultThreadLimitVal = Bld.getInt32(0);
6293     llvm::Value *NumThreadsVal = nullptr;
6294     llvm::Value *ThreadLimitVal = nullptr;
6295 
6296     if (const auto *ThreadLimitClause =
6297             D.getSingleClause<OMPThreadLimitClause>()) {
6298       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6299       auto ThreadLimit = CGF.EmitScalarExpr(ThreadLimitClause->getThreadLimit(),
6300                                             /*IgnoreResultAssign*/ true);
6301       ThreadLimitVal = Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty,
6302                                          /*IsSigned=*/true);
6303     }
6304 
6305     if (const auto *NumThreadsClause =
6306             D.getSingleClause<OMPNumThreadsClause>()) {
6307       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6308       llvm::Value *NumThreads =
6309           CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
6310                              /*IgnoreResultAssign*/ true);
6311       NumThreadsVal =
6312           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/true);
6313     }
6314 
6315     // Select the lesser of thread_limit and num_threads.
6316     if (NumThreadsVal)
6317       ThreadLimitVal = ThreadLimitVal
6318                            ? Bld.CreateSelect(Bld.CreateICmpSLT(NumThreadsVal,
6319                                                                 ThreadLimitVal),
6320                                               NumThreadsVal, ThreadLimitVal)
6321                            : NumThreadsVal;
6322 
6323     // Set default value passed to the runtime if either teams or a target
6324     // parallel type directive is found but no clause is specified.
6325     if (!ThreadLimitVal)
6326       ThreadLimitVal = DefaultThreadLimitVal;
6327 
6328     return ThreadLimitVal;
6329   }
6330 
6331   // If the current target region has a teams region enclosed, we need to get
6332   // the thread limit to pass to the runtime function call. This is done
6333   // by generating the expression in a inlined region. This is required because
6334   // the expression is captured in the enclosing target environment when the
6335   // teams directive is not combined with target.
6336 
6337   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6338 
6339   if (auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>(
6340           ignoreCompoundStmts(CS.getCapturedStmt()))) {
6341     if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) {
6342       if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) {
6343         CGOpenMPInnerExprInfo CGInfo(CGF, CS);
6344         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6345         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit());
6346         return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty,
6347                                          /*IsSigned=*/true);
6348       }
6349 
6350       // If we have an enclosed teams directive but no thread_limit clause we
6351       // use the default value 0.
6352       return CGF.Builder.getInt32(0);
6353     }
6354   }
6355 
6356   // No teams associated with the directive.
6357   return nullptr;
6358 }
6359 
6360 namespace {
6361 // \brief Utility to handle information from clauses associated with a given
6362 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6363 // It provides a convenient interface to obtain the information and generate
6364 // code for that information.
6365 class MappableExprsHandler {
6366 public:
6367   /// \brief Values for bit flags used to specify the mapping type for
6368   /// offloading.
6369   enum OpenMPOffloadMappingFlags {
6370     /// \brief Allocate memory on the device and move data from host to device.
6371     OMP_MAP_TO = 0x01,
6372     /// \brief Allocate memory on the device and move data from device to host.
6373     OMP_MAP_FROM = 0x02,
6374     /// \brief Always perform the requested mapping action on the element, even
6375     /// if it was already mapped before.
6376     OMP_MAP_ALWAYS = 0x04,
6377     /// \brief Delete the element from the device environment, ignoring the
6378     /// current reference count associated with the element.
6379     OMP_MAP_DELETE = 0x08,
6380     /// \brief The element being mapped is a pointer-pointee pair; both the
6381     /// pointer and the pointee should be mapped.
6382     OMP_MAP_PTR_AND_OBJ = 0x10,
6383     /// \brief This flags signals that the base address of an entry should be
6384     /// passed to the target kernel as an argument.
6385     OMP_MAP_TARGET_PARAM = 0x20,
6386     /// \brief Signal that the runtime library has to return the device pointer
6387     /// in the current position for the data being mapped. Used when we have the
6388     /// use_device_ptr clause.
6389     OMP_MAP_RETURN_PARAM = 0x40,
6390     /// \brief This flag signals that the reference being passed is a pointer to
6391     /// private data.
6392     OMP_MAP_PRIVATE = 0x80,
6393     /// \brief Pass the element to the device by value.
6394     OMP_MAP_LITERAL = 0x100,
6395     /// Implicit map
6396     OMP_MAP_IMPLICIT = 0x200,
6397   };
6398 
6399   /// Class that associates information with a base pointer to be passed to the
6400   /// runtime library.
6401   class BasePointerInfo {
6402     /// The base pointer.
6403     llvm::Value *Ptr = nullptr;
6404     /// The base declaration that refers to this device pointer, or null if
6405     /// there is none.
6406     const ValueDecl *DevPtrDecl = nullptr;
6407 
6408   public:
6409     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
6410         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
6411     llvm::Value *operator*() const { return Ptr; }
6412     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
6413     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
6414   };
6415 
6416   typedef SmallVector<BasePointerInfo, 16> MapBaseValuesArrayTy;
6417   typedef SmallVector<llvm::Value *, 16> MapValuesArrayTy;
6418   typedef SmallVector<uint64_t, 16> MapFlagsArrayTy;
6419 
6420 private:
6421   /// \brief Directive from where the map clauses were extracted.
6422   const OMPExecutableDirective &CurDir;
6423 
6424   /// \brief Function the directive is being generated for.
6425   CodeGenFunction &CGF;
6426 
6427   /// \brief Set of all first private variables in the current directive.
6428   llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls;
6429   /// Set of all reduction variables in the current directive.
6430   llvm::SmallPtrSet<const VarDecl *, 8> ReductionDecls;
6431 
6432   /// Map between device pointer declarations and their expression components.
6433   /// The key value for declarations in 'this' is null.
6434   llvm::DenseMap<
6435       const ValueDecl *,
6436       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6437       DevPointersMap;
6438 
6439   llvm::Value *getExprTypeSize(const Expr *E) const {
6440     auto ExprTy = E->getType().getCanonicalType();
6441 
6442     // Reference types are ignored for mapping purposes.
6443     if (auto *RefTy = ExprTy->getAs<ReferenceType>())
6444       ExprTy = RefTy->getPointeeType().getCanonicalType();
6445 
6446     // Given that an array section is considered a built-in type, we need to
6447     // do the calculation based on the length of the section instead of relying
6448     // on CGF.getTypeSize(E->getType()).
6449     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
6450       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
6451                             OAE->getBase()->IgnoreParenImpCasts())
6452                             .getCanonicalType();
6453 
6454       // If there is no length associated with the expression, that means we
6455       // are using the whole length of the base.
6456       if (!OAE->getLength() && OAE->getColonLoc().isValid())
6457         return CGF.getTypeSize(BaseTy);
6458 
6459       llvm::Value *ElemSize;
6460       if (auto *PTy = BaseTy->getAs<PointerType>())
6461         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
6462       else {
6463         auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
6464         assert(ATy && "Expecting array type if not a pointer type.");
6465         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
6466       }
6467 
6468       // If we don't have a length at this point, that is because we have an
6469       // array section with a single element.
6470       if (!OAE->getLength())
6471         return ElemSize;
6472 
6473       auto *LengthVal = CGF.EmitScalarExpr(OAE->getLength());
6474       LengthVal =
6475           CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false);
6476       return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
6477     }
6478     return CGF.getTypeSize(ExprTy);
6479   }
6480 
6481   /// \brief Return the corresponding bits for a given map clause modifier. Add
6482   /// a flag marking the map as a pointer if requested. Add a flag marking the
6483   /// map as the first one of a series of maps that relate to the same map
6484   /// expression.
6485   uint64_t getMapTypeBits(OpenMPMapClauseKind MapType,
6486                           OpenMPMapClauseKind MapTypeModifier, bool AddPtrFlag,
6487                           bool AddIsTargetParamFlag) const {
6488     uint64_t Bits = 0u;
6489     switch (MapType) {
6490     case OMPC_MAP_alloc:
6491     case OMPC_MAP_release:
6492       // alloc and release is the default behavior in the runtime library,  i.e.
6493       // if we don't pass any bits alloc/release that is what the runtime is
6494       // going to do. Therefore, we don't need to signal anything for these two
6495       // type modifiers.
6496       break;
6497     case OMPC_MAP_to:
6498       Bits = OMP_MAP_TO;
6499       break;
6500     case OMPC_MAP_from:
6501       Bits = OMP_MAP_FROM;
6502       break;
6503     case OMPC_MAP_tofrom:
6504       Bits = OMP_MAP_TO | OMP_MAP_FROM;
6505       break;
6506     case OMPC_MAP_delete:
6507       Bits = OMP_MAP_DELETE;
6508       break;
6509     default:
6510       llvm_unreachable("Unexpected map type!");
6511       break;
6512     }
6513     if (AddPtrFlag)
6514       Bits |= OMP_MAP_PTR_AND_OBJ;
6515     if (AddIsTargetParamFlag)
6516       Bits |= OMP_MAP_TARGET_PARAM;
6517     if (MapTypeModifier == OMPC_MAP_always)
6518       Bits |= OMP_MAP_ALWAYS;
6519     return Bits;
6520   }
6521 
6522   /// \brief Return true if the provided expression is a final array section. A
6523   /// final array section, is one whose length can't be proved to be one.
6524   bool isFinalArraySectionExpression(const Expr *E) const {
6525     auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
6526 
6527     // It is not an array section and therefore not a unity-size one.
6528     if (!OASE)
6529       return false;
6530 
6531     // An array section with no colon always refer to a single element.
6532     if (OASE->getColonLoc().isInvalid())
6533       return false;
6534 
6535     auto *Length = OASE->getLength();
6536 
6537     // If we don't have a length we have to check if the array has size 1
6538     // for this dimension. Also, we should always expect a length if the
6539     // base type is pointer.
6540     if (!Length) {
6541       auto BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
6542                          OASE->getBase()->IgnoreParenImpCasts())
6543                          .getCanonicalType();
6544       if (auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
6545         return ATy->getSize().getSExtValue() != 1;
6546       // If we don't have a constant dimension length, we have to consider
6547       // the current section as having any size, so it is not necessarily
6548       // unitary. If it happen to be unity size, that's user fault.
6549       return true;
6550     }
6551 
6552     // Check if the length evaluates to 1.
6553     llvm::APSInt ConstLength;
6554     if (!Length->EvaluateAsInt(ConstLength, CGF.getContext()))
6555       return true; // Can have more that size 1.
6556 
6557     return ConstLength.getSExtValue() != 1;
6558   }
6559 
6560   /// \brief Return the adjusted map modifiers if the declaration a capture
6561   /// refers to appears in a first-private clause. This is expected to be used
6562   /// only with directives that start with 'target'.
6563   unsigned adjustMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap,
6564                                                unsigned CurrentModifiers) {
6565     assert(Cap.capturesVariable() && "Expected capture by reference only!");
6566 
6567     // A first private variable captured by reference will use only the
6568     // 'private ptr' and 'map to' flag. Return the right flags if the captured
6569     // declaration is known as first-private in this handler.
6570     if (FirstPrivateDecls.count(Cap.getCapturedVar()))
6571       return MappableExprsHandler::OMP_MAP_PRIVATE |
6572              MappableExprsHandler::OMP_MAP_TO;
6573     // Reduction variable  will use only the 'private ptr' and 'map to_from'
6574     // flag.
6575     if (ReductionDecls.count(Cap.getCapturedVar())) {
6576       return MappableExprsHandler::OMP_MAP_TO |
6577              MappableExprsHandler::OMP_MAP_FROM;
6578     }
6579 
6580     // We didn't modify anything.
6581     return CurrentModifiers;
6582   }
6583 
6584 public:
6585   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
6586       : CurDir(Dir), CGF(CGF) {
6587     // Extract firstprivate clause information.
6588     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
6589       for (const auto *D : C->varlists())
6590         FirstPrivateDecls.insert(
6591             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
6592     for (const auto *C : Dir.getClausesOfKind<OMPReductionClause>()) {
6593       for (const auto *D : C->varlists()) {
6594         ReductionDecls.insert(
6595             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
6596       }
6597     }
6598     // Extract device pointer clause information.
6599     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
6600       for (auto L : C->component_lists())
6601         DevPointersMap[L.first].push_back(L.second);
6602   }
6603 
6604   /// \brief Generate the base pointers, section pointers, sizes and map type
6605   /// bits for the provided map type, map modifier, and expression components.
6606   /// \a IsFirstComponent should be set to true if the provided set of
6607   /// components is the first associated with a capture.
6608   void generateInfoForComponentList(
6609       OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier,
6610       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6611       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
6612       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
6613       bool IsFirstComponentList, bool IsImplicit) const {
6614 
6615     // The following summarizes what has to be generated for each map and the
6616     // types below. The generated information is expressed in this order:
6617     // base pointer, section pointer, size, flags
6618     // (to add to the ones that come from the map type and modifier).
6619     //
6620     // double d;
6621     // int i[100];
6622     // float *p;
6623     //
6624     // struct S1 {
6625     //   int i;
6626     //   float f[50];
6627     // }
6628     // struct S2 {
6629     //   int i;
6630     //   float f[50];
6631     //   S1 s;
6632     //   double *p;
6633     //   struct S2 *ps;
6634     // }
6635     // S2 s;
6636     // S2 *ps;
6637     //
6638     // map(d)
6639     // &d, &d, sizeof(double), noflags
6640     //
6641     // map(i)
6642     // &i, &i, 100*sizeof(int), noflags
6643     //
6644     // map(i[1:23])
6645     // &i(=&i[0]), &i[1], 23*sizeof(int), noflags
6646     //
6647     // map(p)
6648     // &p, &p, sizeof(float*), noflags
6649     //
6650     // map(p[1:24])
6651     // p, &p[1], 24*sizeof(float), noflags
6652     //
6653     // map(s)
6654     // &s, &s, sizeof(S2), noflags
6655     //
6656     // map(s.i)
6657     // &s, &(s.i), sizeof(int), noflags
6658     //
6659     // map(s.s.f)
6660     // &s, &(s.i.f), 50*sizeof(int), noflags
6661     //
6662     // map(s.p)
6663     // &s, &(s.p), sizeof(double*), noflags
6664     //
6665     // map(s.p[:22], s.a s.b)
6666     // &s, &(s.p), sizeof(double*), noflags
6667     // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag
6668     //
6669     // map(s.ps)
6670     // &s, &(s.ps), sizeof(S2*), noflags
6671     //
6672     // map(s.ps->s.i)
6673     // &s, &(s.ps), sizeof(S2*), noflags
6674     // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag
6675     //
6676     // map(s.ps->ps)
6677     // &s, &(s.ps), sizeof(S2*), noflags
6678     // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag
6679     //
6680     // map(s.ps->ps->ps)
6681     // &s, &(s.ps), sizeof(S2*), noflags
6682     // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag
6683     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag
6684     //
6685     // map(s.ps->ps->s.f[:22])
6686     // &s, &(s.ps), sizeof(S2*), noflags
6687     // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag
6688     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag
6689     //
6690     // map(ps)
6691     // &ps, &ps, sizeof(S2*), noflags
6692     //
6693     // map(ps->i)
6694     // ps, &(ps->i), sizeof(int), noflags
6695     //
6696     // map(ps->s.f)
6697     // ps, &(ps->s.f[0]), 50*sizeof(float), noflags
6698     //
6699     // map(ps->p)
6700     // ps, &(ps->p), sizeof(double*), noflags
6701     //
6702     // map(ps->p[:22])
6703     // ps, &(ps->p), sizeof(double*), noflags
6704     // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag
6705     //
6706     // map(ps->ps)
6707     // ps, &(ps->ps), sizeof(S2*), noflags
6708     //
6709     // map(ps->ps->s.i)
6710     // ps, &(ps->ps), sizeof(S2*), noflags
6711     // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag
6712     //
6713     // map(ps->ps->ps)
6714     // ps, &(ps->ps), sizeof(S2*), noflags
6715     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag
6716     //
6717     // map(ps->ps->ps->ps)
6718     // ps, &(ps->ps), sizeof(S2*), noflags
6719     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag
6720     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag
6721     //
6722     // map(ps->ps->ps->s.f[:22])
6723     // ps, &(ps->ps), sizeof(S2*), noflags
6724     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag
6725     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag
6726 
6727     // Track if the map information being generated is the first for a capture.
6728     bool IsCaptureFirstInfo = IsFirstComponentList;
6729     bool IsLink = false; // Is this variable a "declare target link"?
6730 
6731     // Scan the components from the base to the complete expression.
6732     auto CI = Components.rbegin();
6733     auto CE = Components.rend();
6734     auto I = CI;
6735 
6736     // Track if the map information being generated is the first for a list of
6737     // components.
6738     bool IsExpressionFirstInfo = true;
6739     llvm::Value *BP = nullptr;
6740 
6741     if (auto *ME = dyn_cast<MemberExpr>(I->getAssociatedExpression())) {
6742       // The base is the 'this' pointer. The content of the pointer is going
6743       // to be the base of the field being mapped.
6744       BP = CGF.EmitScalarExpr(ME->getBase());
6745     } else {
6746       // The base is the reference to the variable.
6747       // BP = &Var.
6748       BP = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getPointer();
6749       if (const auto *VD =
6750               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
6751         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
6752             isDeclareTargetDeclaration(VD)) {
6753           assert(*Res == OMPDeclareTargetDeclAttr::MT_Link &&
6754                  "Declare target link is expected.");
6755           // Avoid warning in release build.
6756           (void)*Res;
6757           IsLink = true;
6758           BP = CGF.CGM.getOpenMPRuntime()
6759                    .getAddrOfDeclareTargetLink(VD)
6760                    .getPointer();
6761         }
6762       }
6763 
6764       // If the variable is a pointer and is being dereferenced (i.e. is not
6765       // the last component), the base has to be the pointer itself, not its
6766       // reference. References are ignored for mapping purposes.
6767       QualType Ty =
6768           I->getAssociatedDeclaration()->getType().getNonReferenceType();
6769       if (Ty->isAnyPointerType() && std::next(I) != CE) {
6770         auto PtrAddr = CGF.MakeNaturalAlignAddrLValue(BP, Ty);
6771         BP = CGF.EmitLoadOfPointerLValue(PtrAddr.getAddress(),
6772                                          Ty->castAs<PointerType>())
6773                  .getPointer();
6774 
6775         // We do not need to generate individual map information for the
6776         // pointer, it can be associated with the combined storage.
6777         ++I;
6778       }
6779     }
6780 
6781     uint64_t DefaultFlags = IsImplicit ? OMP_MAP_IMPLICIT : 0;
6782     for (; I != CE; ++I) {
6783       auto Next = std::next(I);
6784 
6785       // We need to generate the addresses and sizes if this is the last
6786       // component, if the component is a pointer or if it is an array section
6787       // whose length can't be proved to be one. If this is a pointer, it
6788       // becomes the base address for the following components.
6789 
6790       // A final array section, is one whose length can't be proved to be one.
6791       bool IsFinalArraySection =
6792           isFinalArraySectionExpression(I->getAssociatedExpression());
6793 
6794       // Get information on whether the element is a pointer. Have to do a
6795       // special treatment for array sections given that they are built-in
6796       // types.
6797       const auto *OASE =
6798           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
6799       bool IsPointer =
6800           (OASE &&
6801            OMPArraySectionExpr::getBaseOriginalType(OASE)
6802                .getCanonicalType()
6803                ->isAnyPointerType()) ||
6804           I->getAssociatedExpression()->getType()->isAnyPointerType();
6805 
6806       if (Next == CE || IsPointer || IsFinalArraySection) {
6807 
6808         // If this is not the last component, we expect the pointer to be
6809         // associated with an array expression or member expression.
6810         assert((Next == CE ||
6811                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
6812                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
6813                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
6814                "Unexpected expression");
6815 
6816         llvm::Value *LB =
6817             CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getPointer();
6818         auto *Size = getExprTypeSize(I->getAssociatedExpression());
6819 
6820         // If we have a member expression and the current component is a
6821         // reference, we have to map the reference too. Whenever we have a
6822         // reference, the section that reference refers to is going to be a
6823         // load instruction from the storage assigned to the reference.
6824         if (isa<MemberExpr>(I->getAssociatedExpression()) &&
6825             I->getAssociatedDeclaration()->getType()->isReferenceType()) {
6826           auto *LI = cast<llvm::LoadInst>(LB);
6827           auto *RefAddr = LI->getPointerOperand();
6828 
6829           BasePointers.push_back(BP);
6830           Pointers.push_back(RefAddr);
6831           Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
6832           Types.push_back(DefaultFlags |
6833                           getMapTypeBits(
6834                               /*MapType*/ OMPC_MAP_alloc,
6835                               /*MapTypeModifier=*/OMPC_MAP_unknown,
6836                               !IsExpressionFirstInfo, IsCaptureFirstInfo));
6837           IsExpressionFirstInfo = false;
6838           IsCaptureFirstInfo = false;
6839           // The reference will be the next base address.
6840           BP = RefAddr;
6841         }
6842 
6843         BasePointers.push_back(BP);
6844         Pointers.push_back(LB);
6845         Sizes.push_back(Size);
6846 
6847         // We need to add a pointer flag for each map that comes from the
6848         // same expression except for the first one. We also need to signal
6849         // this map is the first one that relates with the current capture
6850         // (there is a set of entries for each capture).
6851         Types.push_back(DefaultFlags |
6852                         getMapTypeBits(MapType, MapTypeModifier,
6853                                        !IsExpressionFirstInfo || IsLink,
6854                                        IsCaptureFirstInfo && !IsLink));
6855 
6856         // If we have a final array section, we are done with this expression.
6857         if (IsFinalArraySection)
6858           break;
6859 
6860         // The pointer becomes the base for the next element.
6861         if (Next != CE)
6862           BP = LB;
6863 
6864         IsExpressionFirstInfo = false;
6865         IsCaptureFirstInfo = false;
6866       }
6867     }
6868   }
6869 
6870   /// \brief Generate all the base pointers, section pointers, sizes and map
6871   /// types for the extracted mappable expressions. Also, for each item that
6872   /// relates with a device pointer, a pair of the relevant declaration and
6873   /// index where it occurs is appended to the device pointers info array.
6874   void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
6875                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
6876                        MapFlagsArrayTy &Types) const {
6877     BasePointers.clear();
6878     Pointers.clear();
6879     Sizes.clear();
6880     Types.clear();
6881 
6882     struct MapInfo {
6883       /// Kind that defines how a device pointer has to be returned.
6884       enum ReturnPointerKind {
6885         // Don't have to return any pointer.
6886         RPK_None,
6887         // Pointer is the base of the declaration.
6888         RPK_Base,
6889         // Pointer is a member of the base declaration - 'this'
6890         RPK_Member,
6891         // Pointer is a reference and a member of the base declaration - 'this'
6892         RPK_MemberReference,
6893       };
6894       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
6895       OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
6896       OpenMPMapClauseKind MapTypeModifier = OMPC_MAP_unknown;
6897       ReturnPointerKind ReturnDevicePointer = RPK_None;
6898       bool IsImplicit = false;
6899 
6900       MapInfo() = default;
6901       MapInfo(
6902           OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6903           OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier,
6904           ReturnPointerKind ReturnDevicePointer, bool IsImplicit)
6905           : Components(Components), MapType(MapType),
6906             MapTypeModifier(MapTypeModifier),
6907             ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
6908     };
6909 
6910     // We have to process the component lists that relate with the same
6911     // declaration in a single chunk so that we can generate the map flags
6912     // correctly. Therefore, we organize all lists in a map.
6913     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
6914 
6915     // Helper function to fill the information map for the different supported
6916     // clauses.
6917     auto &&InfoGen = [&Info](
6918         const ValueDecl *D,
6919         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
6920         OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier,
6921         MapInfo::ReturnPointerKind ReturnDevicePointer, bool IsImplicit) {
6922       const ValueDecl *VD =
6923           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
6924       Info[VD].emplace_back(L, MapType, MapModifier, ReturnDevicePointer,
6925                             IsImplicit);
6926     };
6927 
6928     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
6929     for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
6930       for (auto L : C->component_lists()) {
6931         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier(),
6932                 MapInfo::RPK_None, C->isImplicit());
6933       }
6934     for (auto *C : this->CurDir.getClausesOfKind<OMPToClause>())
6935       for (auto L : C->component_lists()) {
6936         InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown,
6937                 MapInfo::RPK_None, C->isImplicit());
6938       }
6939     for (auto *C : this->CurDir.getClausesOfKind<OMPFromClause>())
6940       for (auto L : C->component_lists()) {
6941         InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown,
6942                 MapInfo::RPK_None, C->isImplicit());
6943       }
6944 
6945     // Look at the use_device_ptr clause information and mark the existing map
6946     // entries as such. If there is no map information for an entry in the
6947     // use_device_ptr list, we create one with map type 'alloc' and zero size
6948     // section. It is the user fault if that was not mapped before.
6949     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
6950     for (auto *C : this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>())
6951       for (auto L : C->component_lists()) {
6952         assert(!L.second.empty() && "Not expecting empty list of components!");
6953         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
6954         VD = cast<ValueDecl>(VD->getCanonicalDecl());
6955         auto *IE = L.second.back().getAssociatedExpression();
6956         // If the first component is a member expression, we have to look into
6957         // 'this', which maps to null in the map of map information. Otherwise
6958         // look directly for the information.
6959         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
6960 
6961         // We potentially have map information for this declaration already.
6962         // Look for the first set of components that refer to it.
6963         if (It != Info.end()) {
6964           auto CI = std::find_if(
6965               It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
6966                 return MI.Components.back().getAssociatedDeclaration() == VD;
6967               });
6968           // If we found a map entry, signal that the pointer has to be returned
6969           // and move on to the next declaration.
6970           if (CI != It->second.end()) {
6971             CI->ReturnDevicePointer = isa<MemberExpr>(IE)
6972                                           ? (VD->getType()->isReferenceType()
6973                                                  ? MapInfo::RPK_MemberReference
6974                                                  : MapInfo::RPK_Member)
6975                                           : MapInfo::RPK_Base;
6976             continue;
6977           }
6978         }
6979 
6980         // We didn't find any match in our map information - generate a zero
6981         // size array section.
6982         // FIXME: MSVC 2013 seems to require this-> to find member CGF.
6983         llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(IE),
6984                                                       IE->getExprLoc());
6985         BasePointers.push_back({Ptr, VD});
6986         Pointers.push_back(Ptr);
6987         Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy));
6988         Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
6989       }
6990 
6991     for (auto &M : Info) {
6992       // We need to know when we generate information for the first component
6993       // associated with a capture, because the mapping flags depend on it.
6994       bool IsFirstComponentList = true;
6995       for (MapInfo &L : M.second) {
6996         assert(!L.Components.empty() &&
6997                "Not expecting declaration with no component lists.");
6998 
6999         // Remember the current base pointer index.
7000         unsigned CurrentBasePointersIdx = BasePointers.size();
7001         // FIXME: MSVC 2013 seems to require this-> to find the member method.
7002         this->generateInfoForComponentList(
7003             L.MapType, L.MapTypeModifier, L.Components, BasePointers, Pointers,
7004             Sizes, Types, IsFirstComponentList, L.IsImplicit);
7005 
7006         // If this entry relates with a device pointer, set the relevant
7007         // declaration and add the 'return pointer' flag.
7008         if (IsFirstComponentList &&
7009             L.ReturnDevicePointer != MapInfo::RPK_None) {
7010           // If the pointer is not the base of the map, we need to skip the
7011           // base. If it is a reference in a member field, we also need to skip
7012           // the map of the reference.
7013           if (L.ReturnDevicePointer != MapInfo::RPK_Base) {
7014             ++CurrentBasePointersIdx;
7015             if (L.ReturnDevicePointer == MapInfo::RPK_MemberReference)
7016               ++CurrentBasePointersIdx;
7017           }
7018           assert(BasePointers.size() > CurrentBasePointersIdx &&
7019                  "Unexpected number of mapped base pointers.");
7020 
7021           auto *RelevantVD = L.Components.back().getAssociatedDeclaration();
7022           assert(RelevantVD &&
7023                  "No relevant declaration related with device pointer??");
7024 
7025           BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
7026           Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
7027         }
7028         IsFirstComponentList = false;
7029       }
7030     }
7031   }
7032 
7033   /// \brief Generate the base pointers, section pointers, sizes and map types
7034   /// associated to a given capture.
7035   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
7036                               llvm::Value *Arg,
7037                               MapBaseValuesArrayTy &BasePointers,
7038                               MapValuesArrayTy &Pointers,
7039                               MapValuesArrayTy &Sizes,
7040                               MapFlagsArrayTy &Types) const {
7041     assert(!Cap->capturesVariableArrayType() &&
7042            "Not expecting to generate map info for a variable array type!");
7043 
7044     BasePointers.clear();
7045     Pointers.clear();
7046     Sizes.clear();
7047     Types.clear();
7048 
7049     // We need to know when we generating information for the first component
7050     // associated with a capture, because the mapping flags depend on it.
7051     bool IsFirstComponentList = true;
7052 
7053     const ValueDecl *VD =
7054         Cap->capturesThis()
7055             ? nullptr
7056             : Cap->getCapturedVar()->getCanonicalDecl();
7057 
7058     // If this declaration appears in a is_device_ptr clause we just have to
7059     // pass the pointer by value. If it is a reference to a declaration, we just
7060     // pass its value, otherwise, if it is a member expression, we need to map
7061     // 'to' the field.
7062     if (!VD) {
7063       auto It = DevPointersMap.find(VD);
7064       if (It != DevPointersMap.end()) {
7065         for (auto L : It->second) {
7066           generateInfoForComponentList(
7067               /*MapType=*/OMPC_MAP_to, /*MapTypeModifier=*/OMPC_MAP_unknown, L,
7068               BasePointers, Pointers, Sizes, Types, IsFirstComponentList,
7069               /*IsImplicit=*/false);
7070           IsFirstComponentList = false;
7071         }
7072         return;
7073       }
7074     } else if (DevPointersMap.count(VD)) {
7075       BasePointers.push_back({Arg, VD});
7076       Pointers.push_back(Arg);
7077       Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
7078       Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
7079       return;
7080     }
7081 
7082     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
7083     for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
7084       for (auto L : C->decl_component_lists(VD)) {
7085         assert(L.first == VD &&
7086                "We got information for the wrong declaration??");
7087         assert(!L.second.empty() &&
7088                "Not expecting declaration with no component lists.");
7089         generateInfoForComponentList(
7090             C->getMapType(), C->getMapTypeModifier(), L.second, BasePointers,
7091             Pointers, Sizes, Types, IsFirstComponentList, C->isImplicit());
7092         IsFirstComponentList = false;
7093       }
7094 
7095     return;
7096   }
7097 
7098   /// \brief Generate the default map information for a given capture \a CI,
7099   /// record field declaration \a RI and captured value \a CV.
7100   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
7101                               const FieldDecl &RI, llvm::Value *CV,
7102                               MapBaseValuesArrayTy &CurBasePointers,
7103                               MapValuesArrayTy &CurPointers,
7104                               MapValuesArrayTy &CurSizes,
7105                               MapFlagsArrayTy &CurMapTypes) {
7106 
7107     // Do the default mapping.
7108     if (CI.capturesThis()) {
7109       CurBasePointers.push_back(CV);
7110       CurPointers.push_back(CV);
7111       const PointerType *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
7112       CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType()));
7113       // Default map type.
7114       CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
7115     } else if (CI.capturesVariableByCopy()) {
7116       CurBasePointers.push_back(CV);
7117       CurPointers.push_back(CV);
7118       if (!RI.getType()->isAnyPointerType()) {
7119         // We have to signal to the runtime captures passed by value that are
7120         // not pointers.
7121         CurMapTypes.push_back(OMP_MAP_LITERAL);
7122         CurSizes.push_back(CGF.getTypeSize(RI.getType()));
7123       } else {
7124         // Pointers are implicitly mapped with a zero size and no flags
7125         // (other than first map that is added for all implicit maps).
7126         CurMapTypes.push_back(0u);
7127         CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy));
7128       }
7129     } else {
7130       assert(CI.capturesVariable() && "Expected captured reference.");
7131       CurBasePointers.push_back(CV);
7132       CurPointers.push_back(CV);
7133 
7134       const ReferenceType *PtrTy =
7135           cast<ReferenceType>(RI.getType().getTypePtr());
7136       QualType ElementType = PtrTy->getPointeeType();
7137       CurSizes.push_back(CGF.getTypeSize(ElementType));
7138       // The default map type for a scalar/complex type is 'to' because by
7139       // default the value doesn't have to be retrieved. For an aggregate
7140       // type, the default is 'tofrom'.
7141       CurMapTypes.emplace_back(adjustMapModifiersForPrivateClauses(
7142           CI, ElementType->isAggregateType() ? (OMP_MAP_TO | OMP_MAP_FROM)
7143                                              : OMP_MAP_TO));
7144     }
7145     // Every default map produces a single argument which is a target parameter.
7146     CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
7147   }
7148 };
7149 
7150 enum OpenMPOffloadingReservedDeviceIDs {
7151   /// \brief Device ID if the device was not defined, runtime should get it
7152   /// from environment variables in the spec.
7153   OMP_DEVICEID_UNDEF = -1,
7154 };
7155 } // anonymous namespace
7156 
7157 /// \brief Emit the arrays used to pass the captures and map information to the
7158 /// offloading runtime library. If there is no map or capture information,
7159 /// return nullptr by reference.
7160 static void
7161 emitOffloadingArrays(CodeGenFunction &CGF,
7162                      MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
7163                      MappableExprsHandler::MapValuesArrayTy &Pointers,
7164                      MappableExprsHandler::MapValuesArrayTy &Sizes,
7165                      MappableExprsHandler::MapFlagsArrayTy &MapTypes,
7166                      CGOpenMPRuntime::TargetDataInfo &Info) {
7167   auto &CGM = CGF.CGM;
7168   auto &Ctx = CGF.getContext();
7169 
7170   // Reset the array information.
7171   Info.clearArrayInfo();
7172   Info.NumberOfPtrs = BasePointers.size();
7173 
7174   if (Info.NumberOfPtrs) {
7175     // Detect if we have any capture size requiring runtime evaluation of the
7176     // size so that a constant array could be eventually used.
7177     bool hasRuntimeEvaluationCaptureSize = false;
7178     for (auto *S : Sizes)
7179       if (!isa<llvm::Constant>(S)) {
7180         hasRuntimeEvaluationCaptureSize = true;
7181         break;
7182       }
7183 
7184     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
7185     QualType PointerArrayType =
7186         Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
7187                                  /*IndexTypeQuals=*/0);
7188 
7189     Info.BasePointersArray =
7190         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
7191     Info.PointersArray =
7192         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
7193 
7194     // If we don't have any VLA types or other types that require runtime
7195     // evaluation, we can use a constant array for the map sizes, otherwise we
7196     // need to fill up the arrays as we do for the pointers.
7197     if (hasRuntimeEvaluationCaptureSize) {
7198       QualType SizeArrayType = Ctx.getConstantArrayType(
7199           Ctx.getSizeType(), PointerNumAP, ArrayType::Normal,
7200           /*IndexTypeQuals=*/0);
7201       Info.SizesArray =
7202           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
7203     } else {
7204       // We expect all the sizes to be constant, so we collect them to create
7205       // a constant array.
7206       SmallVector<llvm::Constant *, 16> ConstSizes;
7207       for (auto S : Sizes)
7208         ConstSizes.push_back(cast<llvm::Constant>(S));
7209 
7210       auto *SizesArrayInit = llvm::ConstantArray::get(
7211           llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes);
7212       auto *SizesArrayGbl = new llvm::GlobalVariable(
7213           CGM.getModule(), SizesArrayInit->getType(),
7214           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
7215           SizesArrayInit, ".offload_sizes");
7216       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
7217       Info.SizesArray = SizesArrayGbl;
7218     }
7219 
7220     // The map types are always constant so we don't need to generate code to
7221     // fill arrays. Instead, we create an array constant.
7222     llvm::Constant *MapTypesArrayInit =
7223         llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes);
7224     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
7225         CGM.getModule(), MapTypesArrayInit->getType(),
7226         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
7227         MapTypesArrayInit, ".offload_maptypes");
7228     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
7229     Info.MapTypesArray = MapTypesArrayGbl;
7230 
7231     for (unsigned i = 0; i < Info.NumberOfPtrs; ++i) {
7232       llvm::Value *BPVal = *BasePointers[i];
7233       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
7234           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
7235           Info.BasePointersArray, 0, i);
7236       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7237           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
7238       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
7239       CGF.Builder.CreateStore(BPVal, BPAddr);
7240 
7241       if (Info.requiresDevicePointerInfo())
7242         if (auto *DevVD = BasePointers[i].getDevicePtrDecl())
7243           Info.CaptureDeviceAddrMap.insert(std::make_pair(DevVD, BPAddr));
7244 
7245       llvm::Value *PVal = Pointers[i];
7246       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
7247           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
7248           Info.PointersArray, 0, i);
7249       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7250           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
7251       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
7252       CGF.Builder.CreateStore(PVal, PAddr);
7253 
7254       if (hasRuntimeEvaluationCaptureSize) {
7255         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
7256             llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs),
7257             Info.SizesArray,
7258             /*Idx0=*/0,
7259             /*Idx1=*/i);
7260         Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType()));
7261         CGF.Builder.CreateStore(
7262             CGF.Builder.CreateIntCast(Sizes[i], CGM.SizeTy, /*isSigned=*/true),
7263             SAddr);
7264       }
7265     }
7266   }
7267 }
7268 /// \brief Emit the arguments to be passed to the runtime library based on the
7269 /// arrays of pointers, sizes and map types.
7270 static void emitOffloadingArraysArgument(
7271     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
7272     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
7273     llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
7274   auto &CGM = CGF.CGM;
7275   if (Info.NumberOfPtrs) {
7276     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
7277         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
7278         Info.BasePointersArray,
7279         /*Idx0=*/0, /*Idx1=*/0);
7280     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
7281         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
7282         Info.PointersArray,
7283         /*Idx0=*/0,
7284         /*Idx1=*/0);
7285     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
7286         llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray,
7287         /*Idx0=*/0, /*Idx1=*/0);
7288     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
7289         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
7290         Info.MapTypesArray,
7291         /*Idx0=*/0,
7292         /*Idx1=*/0);
7293   } else {
7294     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
7295     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
7296     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo());
7297     MapTypesArrayArg =
7298         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
7299   }
7300 }
7301 
7302 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
7303                                      const OMPExecutableDirective &D,
7304                                      llvm::Value *OutlinedFn,
7305                                      llvm::Value *OutlinedFnID,
7306                                      const Expr *IfCond, const Expr *Device) {
7307   if (!CGF.HaveInsertPoint())
7308     return;
7309 
7310   assert(OutlinedFn && "Invalid outlined function!");
7311 
7312   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
7313   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
7314   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
7315   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
7316                                             PrePostActionTy &) {
7317     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
7318   };
7319   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
7320 
7321   CodeGenFunction::OMPTargetDataInfo InputInfo;
7322   llvm::Value *MapTypesArray = nullptr;
7323   // Fill up the pointer arrays and transfer execution to the device.
7324   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
7325                     &MapTypesArray, &CS, RequiresOuterTask,
7326                     &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) {
7327     // On top of the arrays that were filled up, the target offloading call
7328     // takes as arguments the device id as well as the host pointer. The host
7329     // pointer is used by the runtime library to identify the current target
7330     // region, so it only has to be unique and not necessarily point to
7331     // anything. It could be the pointer to the outlined function that
7332     // implements the target region, but we aren't using that so that the
7333     // compiler doesn't need to keep that, and could therefore inline the host
7334     // function if proven worthwhile during optimization.
7335 
7336     // From this point on, we need to have an ID of the target region defined.
7337     assert(OutlinedFnID && "Invalid outlined function ID!");
7338 
7339     // Emit device ID if any.
7340     llvm::Value *DeviceID;
7341     if (Device) {
7342       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
7343                                            CGF.Int64Ty, /*isSigned=*/true);
7344     } else {
7345       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
7346     }
7347 
7348     // Emit the number of elements in the offloading arrays.
7349     llvm::Value *PointerNum =
7350         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
7351 
7352     // Return value of the runtime offloading call.
7353     llvm::Value *Return;
7354 
7355     auto *NumTeams = emitNumTeamsForTargetDirective(*this, CGF, D);
7356     auto *NumThreads = emitNumThreadsForTargetDirective(*this, CGF, D);
7357 
7358     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
7359     // The target region is an outlined function launched by the runtime
7360     // via calls __tgt_target() or __tgt_target_teams().
7361     //
7362     // __tgt_target() launches a target region with one team and one thread,
7363     // executing a serial region.  This master thread may in turn launch
7364     // more threads within its team upon encountering a parallel region,
7365     // however, no additional teams can be launched on the device.
7366     //
7367     // __tgt_target_teams() launches a target region with one or more teams,
7368     // each with one or more threads.  This call is required for target
7369     // constructs such as:
7370     //  'target teams'
7371     //  'target' / 'teams'
7372     //  'target teams distribute parallel for'
7373     //  'target parallel'
7374     // and so on.
7375     //
7376     // Note that on the host and CPU targets, the runtime implementation of
7377     // these calls simply call the outlined function without forking threads.
7378     // The outlined functions themselves have runtime calls to
7379     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
7380     // the compiler in emitTeamsCall() and emitParallelCall().
7381     //
7382     // In contrast, on the NVPTX target, the implementation of
7383     // __tgt_target_teams() launches a GPU kernel with the requested number
7384     // of teams and threads so no additional calls to the runtime are required.
7385     if (NumTeams) {
7386       // If we have NumTeams defined this means that we have an enclosed teams
7387       // region. Therefore we also expect to have NumThreads defined. These two
7388       // values should be defined in the presence of a teams directive,
7389       // regardless of having any clauses associated. If the user is using teams
7390       // but no clauses, these two values will be the default that should be
7391       // passed to the runtime library - a 32-bit integer with the value zero.
7392       assert(NumThreads && "Thread limit expression should be available along "
7393                            "with number of teams.");
7394       llvm::Value *OffloadingArgs[] = {DeviceID,
7395                                        OutlinedFnID,
7396                                        PointerNum,
7397                                        InputInfo.BasePointersArray.getPointer(),
7398                                        InputInfo.PointersArray.getPointer(),
7399                                        InputInfo.SizesArray.getPointer(),
7400                                        MapTypesArray,
7401                                        NumTeams,
7402                                        NumThreads};
7403       Return = CGF.EmitRuntimeCall(
7404           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
7405                                           : OMPRTL__tgt_target_teams),
7406           OffloadingArgs);
7407     } else {
7408       llvm::Value *OffloadingArgs[] = {DeviceID,
7409                                        OutlinedFnID,
7410                                        PointerNum,
7411                                        InputInfo.BasePointersArray.getPointer(),
7412                                        InputInfo.PointersArray.getPointer(),
7413                                        InputInfo.SizesArray.getPointer(),
7414                                        MapTypesArray};
7415       Return = CGF.EmitRuntimeCall(
7416           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
7417                                           : OMPRTL__tgt_target),
7418           OffloadingArgs);
7419     }
7420 
7421     // Check the error code and execute the host version if required.
7422     llvm::BasicBlock *OffloadFailedBlock =
7423         CGF.createBasicBlock("omp_offload.failed");
7424     llvm::BasicBlock *OffloadContBlock =
7425         CGF.createBasicBlock("omp_offload.cont");
7426     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
7427     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
7428 
7429     CGF.EmitBlock(OffloadFailedBlock);
7430     if (RequiresOuterTask) {
7431       CapturedVars.clear();
7432       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
7433     }
7434     emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, CapturedVars);
7435     CGF.EmitBranch(OffloadContBlock);
7436 
7437     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
7438   };
7439 
7440   // Notify that the host version must be executed.
7441   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
7442                     RequiresOuterTask](CodeGenFunction &CGF,
7443                                        PrePostActionTy &) {
7444     if (RequiresOuterTask) {
7445       CapturedVars.clear();
7446       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
7447     }
7448     emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, CapturedVars);
7449   };
7450 
7451   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
7452                           &CapturedVars, RequiresOuterTask,
7453                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
7454     // Fill up the arrays with all the captured variables.
7455     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
7456     MappableExprsHandler::MapValuesArrayTy Pointers;
7457     MappableExprsHandler::MapValuesArrayTy Sizes;
7458     MappableExprsHandler::MapFlagsArrayTy MapTypes;
7459 
7460     MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
7461     MappableExprsHandler::MapValuesArrayTy CurPointers;
7462     MappableExprsHandler::MapValuesArrayTy CurSizes;
7463     MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
7464 
7465     // Get mappable expression information.
7466     MappableExprsHandler MEHandler(D, CGF);
7467 
7468     auto RI = CS.getCapturedRecordDecl()->field_begin();
7469     auto CV = CapturedVars.begin();
7470     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
7471                                               CE = CS.capture_end();
7472          CI != CE; ++CI, ++RI, ++CV) {
7473       CurBasePointers.clear();
7474       CurPointers.clear();
7475       CurSizes.clear();
7476       CurMapTypes.clear();
7477 
7478       // VLA sizes are passed to the outlined region by copy and do not have map
7479       // information associated.
7480       if (CI->capturesVariableArrayType()) {
7481         CurBasePointers.push_back(*CV);
7482         CurPointers.push_back(*CV);
7483         CurSizes.push_back(CGF.getTypeSize(RI->getType()));
7484         // Copy to the device as an argument. No need to retrieve it.
7485         CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
7486                               MappableExprsHandler::OMP_MAP_TARGET_PARAM);
7487       } else {
7488         // If we have any information in the map clause, we use it, otherwise we
7489         // just do a default mapping.
7490         MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
7491                                          CurSizes, CurMapTypes);
7492         if (CurBasePointers.empty())
7493           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
7494                                            CurPointers, CurSizes, CurMapTypes);
7495       }
7496       // We expect to have at least an element of information for this capture.
7497       assert(!CurBasePointers.empty() &&
7498              "Non-existing map pointer for capture!");
7499       assert(CurBasePointers.size() == CurPointers.size() &&
7500              CurBasePointers.size() == CurSizes.size() &&
7501              CurBasePointers.size() == CurMapTypes.size() &&
7502              "Inconsistent map information sizes!");
7503 
7504       // We need to append the results of this capture to what we already have.
7505       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
7506       Pointers.append(CurPointers.begin(), CurPointers.end());
7507       Sizes.append(CurSizes.begin(), CurSizes.end());
7508       MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
7509     }
7510     // Map other list items in the map clause which are not captured variables
7511     // but "declare target link" global variables.
7512     for (const auto *C : D.getClausesOfKind<OMPMapClause>()) {
7513       for (auto L : C->component_lists()) {
7514         if (!L.first)
7515           continue;
7516         const auto *VD = dyn_cast<VarDecl>(L.first);
7517         if (!VD)
7518           continue;
7519         llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7520             isDeclareTargetDeclaration(VD);
7521         if (!Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
7522           continue;
7523         MEHandler.generateInfoForComponentList(
7524             C->getMapType(), C->getMapTypeModifier(), L.second, BasePointers,
7525             Pointers, Sizes, MapTypes, /*IsFirstComponentList=*/true,
7526             C->isImplicit());
7527       }
7528     }
7529 
7530     TargetDataInfo Info;
7531     // Fill up the arrays and create the arguments.
7532     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
7533     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
7534                                  Info.PointersArray, Info.SizesArray,
7535                                  Info.MapTypesArray, Info);
7536     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
7537     InputInfo.BasePointersArray =
7538         Address(Info.BasePointersArray, CGM.getPointerAlign());
7539     InputInfo.PointersArray =
7540         Address(Info.PointersArray, CGM.getPointerAlign());
7541     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
7542     MapTypesArray = Info.MapTypesArray;
7543     if (RequiresOuterTask)
7544       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
7545     else
7546       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
7547   };
7548 
7549   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
7550                              CodeGenFunction &CGF, PrePostActionTy &) {
7551     if (RequiresOuterTask) {
7552       CodeGenFunction::OMPTargetDataInfo InputInfo;
7553       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
7554     } else {
7555       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
7556     }
7557   };
7558 
7559   // If we have a target function ID it means that we need to support
7560   // offloading, otherwise, just execute on the host. We need to execute on host
7561   // regardless of the conditional in the if clause if, e.g., the user do not
7562   // specify target triples.
7563   if (OutlinedFnID) {
7564     if (IfCond) {
7565       emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
7566     } else {
7567       RegionCodeGenTy ThenRCG(TargetThenGen);
7568       ThenRCG(CGF);
7569     }
7570   } else {
7571     RegionCodeGenTy ElseRCG(TargetElseGen);
7572     ElseRCG(CGF);
7573   }
7574 }
7575 
7576 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
7577                                                     StringRef ParentName) {
7578   if (!S)
7579     return;
7580 
7581   // Codegen OMP target directives that offload compute to the device.
7582   bool requiresDeviceCodegen =
7583       isa<OMPExecutableDirective>(S) &&
7584       isOpenMPTargetExecutionDirective(
7585           cast<OMPExecutableDirective>(S)->getDirectiveKind());
7586 
7587   if (requiresDeviceCodegen) {
7588     auto &E = *cast<OMPExecutableDirective>(S);
7589     unsigned DeviceID;
7590     unsigned FileID;
7591     unsigned Line;
7592     getTargetEntryUniqueInfo(CGM.getContext(), E.getLocStart(), DeviceID,
7593                              FileID, Line);
7594 
7595     // Is this a target region that should not be emitted as an entry point? If
7596     // so just signal we are done with this target region.
7597     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
7598                                                             ParentName, Line))
7599       return;
7600 
7601     switch (S->getStmtClass()) {
7602     case Stmt::OMPTargetDirectiveClass:
7603       CodeGenFunction::EmitOMPTargetDeviceFunction(
7604           CGM, ParentName, cast<OMPTargetDirective>(*S));
7605       break;
7606     case Stmt::OMPTargetParallelDirectiveClass:
7607       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
7608           CGM, ParentName, cast<OMPTargetParallelDirective>(*S));
7609       break;
7610     case Stmt::OMPTargetTeamsDirectiveClass:
7611       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
7612           CGM, ParentName, cast<OMPTargetTeamsDirective>(*S));
7613       break;
7614     case Stmt::OMPTargetTeamsDistributeDirectiveClass:
7615       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
7616           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(*S));
7617       break;
7618     case Stmt::OMPTargetTeamsDistributeSimdDirectiveClass:
7619       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
7620           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(*S));
7621       break;
7622     case Stmt::OMPTargetParallelForDirectiveClass:
7623       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
7624           CGM, ParentName, cast<OMPTargetParallelForDirective>(*S));
7625       break;
7626     case Stmt::OMPTargetParallelForSimdDirectiveClass:
7627       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
7628           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(*S));
7629       break;
7630     case Stmt::OMPTargetSimdDirectiveClass:
7631       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
7632           CGM, ParentName, cast<OMPTargetSimdDirective>(*S));
7633       break;
7634     case Stmt::OMPTargetTeamsDistributeParallelForDirectiveClass:
7635       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
7636           CGM, ParentName,
7637           cast<OMPTargetTeamsDistributeParallelForDirective>(*S));
7638       break;
7639     case Stmt::OMPTargetTeamsDistributeParallelForSimdDirectiveClass:
7640       CodeGenFunction::
7641           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
7642               CGM, ParentName,
7643               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(*S));
7644       break;
7645     default:
7646       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
7647     }
7648     return;
7649   }
7650 
7651   if (const OMPExecutableDirective *E = dyn_cast<OMPExecutableDirective>(S)) {
7652     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
7653       return;
7654 
7655     scanForTargetRegionsFunctions(
7656         E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
7657     return;
7658   }
7659 
7660   // If this is a lambda function, look into its body.
7661   if (auto *L = dyn_cast<LambdaExpr>(S))
7662     S = L->getBody();
7663 
7664   // Keep looking for target regions recursively.
7665   for (auto *II : S->children())
7666     scanForTargetRegionsFunctions(II, ParentName);
7667 }
7668 
7669 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
7670   auto &FD = *cast<FunctionDecl>(GD.getDecl());
7671 
7672   // If emitting code for the host, we do not process FD here. Instead we do
7673   // the normal code generation.
7674   if (!CGM.getLangOpts().OpenMPIsDevice)
7675     return false;
7676 
7677   // Try to detect target regions in the function.
7678   scanForTargetRegionsFunctions(FD.getBody(), CGM.getMangledName(GD));
7679 
7680   // Do not to emit function if it is not marked as declare target.
7681   return !isDeclareTargetDeclaration(&FD);
7682 }
7683 
7684 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
7685   if (!CGM.getLangOpts().OpenMPIsDevice)
7686     return false;
7687 
7688   // Check if there are Ctors/Dtors in this declaration and look for target
7689   // regions in it. We use the complete variant to produce the kernel name
7690   // mangling.
7691   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
7692   if (auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
7693     for (auto *Ctor : RD->ctors()) {
7694       StringRef ParentName =
7695           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
7696       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
7697     }
7698     auto *Dtor = RD->getDestructor();
7699     if (Dtor) {
7700       StringRef ParentName =
7701           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
7702       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
7703     }
7704   }
7705 
7706   // Do not to emit variable if it is not marked as declare target.
7707   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7708       isDeclareTargetDeclaration(cast<VarDecl>(GD.getDecl()));
7709   return !Res || *Res == OMPDeclareTargetDeclAttr::MT_Link;
7710 }
7711 
7712 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
7713                                                    llvm::Constant *Addr) {
7714   if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7715           isDeclareTargetDeclaration(VD)) {
7716     OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
7717     StringRef VarName;
7718     CharUnits VarSize;
7719     llvm::GlobalValue::LinkageTypes Linkage;
7720     switch (*Res) {
7721     case OMPDeclareTargetDeclAttr::MT_To:
7722       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
7723       VarName = CGM.getMangledName(VD);
7724       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
7725       Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
7726       break;
7727     case OMPDeclareTargetDeclAttr::MT_Link:
7728       // Map type 'to' because we do not map the original variable but the
7729       // reference.
7730       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
7731       if (!CGM.getLangOpts().OpenMPIsDevice) {
7732         Addr =
7733             cast<llvm::Constant>(getAddrOfDeclareTargetLink(VD).getPointer());
7734       }
7735       VarName = Addr->getName();
7736       VarSize = CGM.getPointerSize();
7737       Linkage = llvm::GlobalValue::WeakAnyLinkage;
7738       break;
7739     }
7740     OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
7741         VarName, Addr, VarSize, Flags, Linkage);
7742   }
7743 }
7744 
7745 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
7746   auto *VD = GD.getDecl();
7747   if (isa<FunctionDecl>(VD))
7748     return emitTargetFunctions(GD);
7749 
7750   return emitTargetGlobalVariable(GD);
7751 }
7752 
7753 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
7754     CodeGenModule &CGM)
7755     : CGM(CGM) {
7756   if (CGM.getLangOpts().OpenMPIsDevice) {
7757     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
7758     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
7759   }
7760 }
7761 
7762 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
7763   if (CGM.getLangOpts().OpenMPIsDevice)
7764     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
7765 }
7766 
7767 bool CGOpenMPRuntime::markAsGlobalTarget(const FunctionDecl *D) {
7768   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
7769     return true;
7770 
7771   const FunctionDecl *FD = D->getCanonicalDecl();
7772   // Do not to emit function if it is marked as declare target as it was already
7773   // emitted.
7774   if (isDeclareTargetDeclaration(D)) {
7775     if (D->hasBody() && AlreadyEmittedTargetFunctions.count(FD) == 0) {
7776       if (auto *F = dyn_cast_or_null<llvm::Function>(
7777               CGM.GetGlobalValue(CGM.getMangledName(D))))
7778         return !F->isDeclaration();
7779       return false;
7780     }
7781     return true;
7782   }
7783 
7784   // Do not mark member functions except for static.
7785   if (const auto *Method = dyn_cast<CXXMethodDecl>(FD))
7786     if (!Method->isStatic())
7787       return true;
7788 
7789   return !AlreadyEmittedTargetFunctions.insert(FD).second;
7790 }
7791 
7792 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
7793   // If we have offloading in the current module, we need to emit the entries
7794   // now and register the offloading descriptor.
7795   createOffloadEntriesAndInfoMetadata();
7796 
7797   // Create and register the offloading binary descriptors. This is the main
7798   // entity that captures all the information about offloading in the current
7799   // compilation unit.
7800   return createOffloadingBinaryDescriptorRegistration();
7801 }
7802 
7803 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
7804                                     const OMPExecutableDirective &D,
7805                                     SourceLocation Loc,
7806                                     llvm::Value *OutlinedFn,
7807                                     ArrayRef<llvm::Value *> CapturedVars) {
7808   if (!CGF.HaveInsertPoint())
7809     return;
7810 
7811   auto *RTLoc = emitUpdateLocation(CGF, Loc);
7812   CodeGenFunction::RunCleanupsScope Scope(CGF);
7813 
7814   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
7815   llvm::Value *Args[] = {
7816       RTLoc,
7817       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
7818       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
7819   llvm::SmallVector<llvm::Value *, 16> RealArgs;
7820   RealArgs.append(std::begin(Args), std::end(Args));
7821   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
7822 
7823   auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
7824   CGF.EmitRuntimeCall(RTLFn, RealArgs);
7825 }
7826 
7827 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
7828                                          const Expr *NumTeams,
7829                                          const Expr *ThreadLimit,
7830                                          SourceLocation Loc) {
7831   if (!CGF.HaveInsertPoint())
7832     return;
7833 
7834   auto *RTLoc = emitUpdateLocation(CGF, Loc);
7835 
7836   llvm::Value *NumTeamsVal =
7837       (NumTeams)
7838           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
7839                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
7840           : CGF.Builder.getInt32(0);
7841 
7842   llvm::Value *ThreadLimitVal =
7843       (ThreadLimit)
7844           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
7845                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
7846           : CGF.Builder.getInt32(0);
7847 
7848   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
7849   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
7850                                      ThreadLimitVal};
7851   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
7852                       PushNumTeamsArgs);
7853 }
7854 
7855 void CGOpenMPRuntime::emitTargetDataCalls(
7856     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
7857     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
7858   if (!CGF.HaveInsertPoint())
7859     return;
7860 
7861   // Action used to replace the default codegen action and turn privatization
7862   // off.
7863   PrePostActionTy NoPrivAction;
7864 
7865   // Generate the code for the opening of the data environment. Capture all the
7866   // arguments of the runtime call by reference because they are used in the
7867   // closing of the region.
7868   auto &&BeginThenGen = [this, &D, Device, &Info,
7869                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
7870     // Fill up the arrays with all the mapped variables.
7871     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
7872     MappableExprsHandler::MapValuesArrayTy Pointers;
7873     MappableExprsHandler::MapValuesArrayTy Sizes;
7874     MappableExprsHandler::MapFlagsArrayTy MapTypes;
7875 
7876     // Get map clause information.
7877     MappableExprsHandler MCHandler(D, CGF);
7878     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
7879 
7880     // Fill up the arrays and create the arguments.
7881     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
7882 
7883     llvm::Value *BasePointersArrayArg = nullptr;
7884     llvm::Value *PointersArrayArg = nullptr;
7885     llvm::Value *SizesArrayArg = nullptr;
7886     llvm::Value *MapTypesArrayArg = nullptr;
7887     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
7888                                  SizesArrayArg, MapTypesArrayArg, Info);
7889 
7890     // Emit device ID if any.
7891     llvm::Value *DeviceID = nullptr;
7892     if (Device) {
7893       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
7894                                            CGF.Int64Ty, /*isSigned=*/true);
7895     } else {
7896       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
7897     }
7898 
7899     // Emit the number of elements in the offloading arrays.
7900     auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
7901 
7902     llvm::Value *OffloadingArgs[] = {
7903         DeviceID,         PointerNum,    BasePointersArrayArg,
7904         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
7905     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin),
7906                         OffloadingArgs);
7907 
7908     // If device pointer privatization is required, emit the body of the region
7909     // here. It will have to be duplicated: with and without privatization.
7910     if (!Info.CaptureDeviceAddrMap.empty())
7911       CodeGen(CGF);
7912   };
7913 
7914   // Generate code for the closing of the data region.
7915   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
7916                                             PrePostActionTy &) {
7917     assert(Info.isValid() && "Invalid data environment closing arguments.");
7918 
7919     llvm::Value *BasePointersArrayArg = nullptr;
7920     llvm::Value *PointersArrayArg = nullptr;
7921     llvm::Value *SizesArrayArg = nullptr;
7922     llvm::Value *MapTypesArrayArg = nullptr;
7923     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
7924                                  SizesArrayArg, MapTypesArrayArg, Info);
7925 
7926     // Emit device ID if any.
7927     llvm::Value *DeviceID = nullptr;
7928     if (Device) {
7929       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
7930                                            CGF.Int64Ty, /*isSigned=*/true);
7931     } else {
7932       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
7933     }
7934 
7935     // Emit the number of elements in the offloading arrays.
7936     auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
7937 
7938     llvm::Value *OffloadingArgs[] = {
7939         DeviceID,         PointerNum,    BasePointersArrayArg,
7940         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
7941     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end),
7942                         OffloadingArgs);
7943   };
7944 
7945   // If we need device pointer privatization, we need to emit the body of the
7946   // region with no privatization in the 'else' branch of the conditional.
7947   // Otherwise, we don't have to do anything.
7948   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
7949                                                          PrePostActionTy &) {
7950     if (!Info.CaptureDeviceAddrMap.empty()) {
7951       CodeGen.setAction(NoPrivAction);
7952       CodeGen(CGF);
7953     }
7954   };
7955 
7956   // We don't have to do anything to close the region if the if clause evaluates
7957   // to false.
7958   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
7959 
7960   if (IfCond) {
7961     emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
7962   } else {
7963     RegionCodeGenTy RCG(BeginThenGen);
7964     RCG(CGF);
7965   }
7966 
7967   // If we don't require privatization of device pointers, we emit the body in
7968   // between the runtime calls. This avoids duplicating the body code.
7969   if (Info.CaptureDeviceAddrMap.empty()) {
7970     CodeGen.setAction(NoPrivAction);
7971     CodeGen(CGF);
7972   }
7973 
7974   if (IfCond) {
7975     emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen);
7976   } else {
7977     RegionCodeGenTy RCG(EndThenGen);
7978     RCG(CGF);
7979   }
7980 }
7981 
7982 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
7983     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
7984     const Expr *Device) {
7985   if (!CGF.HaveInsertPoint())
7986     return;
7987 
7988   assert((isa<OMPTargetEnterDataDirective>(D) ||
7989           isa<OMPTargetExitDataDirective>(D) ||
7990           isa<OMPTargetUpdateDirective>(D)) &&
7991          "Expecting either target enter, exit data, or update directives.");
7992 
7993   CodeGenFunction::OMPTargetDataInfo InputInfo;
7994   llvm::Value *MapTypesArray = nullptr;
7995   // Generate the code for the opening of the data environment.
7996   auto &&ThenGen = [this, &D, Device, &InputInfo,
7997                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
7998     // Emit device ID if any.
7999     llvm::Value *DeviceID = nullptr;
8000     if (Device) {
8001       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
8002                                            CGF.Int64Ty, /*isSigned=*/true);
8003     } else {
8004       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
8005     }
8006 
8007     // Emit the number of elements in the offloading arrays.
8008     llvm::Constant *PointerNum =
8009         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
8010 
8011     llvm::Value *OffloadingArgs[] = {DeviceID,
8012                                      PointerNum,
8013                                      InputInfo.BasePointersArray.getPointer(),
8014                                      InputInfo.PointersArray.getPointer(),
8015                                      InputInfo.SizesArray.getPointer(),
8016                                      MapTypesArray};
8017 
8018     // Select the right runtime function call for each expected standalone
8019     // directive.
8020     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
8021     OpenMPRTLFunction RTLFn;
8022     switch (D.getDirectiveKind()) {
8023     default:
8024       llvm_unreachable("Unexpected standalone target data directive.");
8025       break;
8026     case OMPD_target_enter_data:
8027       RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
8028                         : OMPRTL__tgt_target_data_begin;
8029       break;
8030     case OMPD_target_exit_data:
8031       RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
8032                         : OMPRTL__tgt_target_data_end;
8033       break;
8034     case OMPD_target_update:
8035       RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
8036                         : OMPRTL__tgt_target_data_update;
8037       break;
8038     }
8039     CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
8040   };
8041 
8042   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
8043                              CodeGenFunction &CGF, PrePostActionTy &) {
8044     // Fill up the arrays with all the mapped variables.
8045     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
8046     MappableExprsHandler::MapValuesArrayTy Pointers;
8047     MappableExprsHandler::MapValuesArrayTy Sizes;
8048     MappableExprsHandler::MapFlagsArrayTy MapTypes;
8049 
8050     // Get map clause information.
8051     MappableExprsHandler MEHandler(D, CGF);
8052     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
8053 
8054     TargetDataInfo Info;
8055     // Fill up the arrays and create the arguments.
8056     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
8057     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
8058                                  Info.PointersArray, Info.SizesArray,
8059                                  Info.MapTypesArray, Info);
8060     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
8061     InputInfo.BasePointersArray =
8062         Address(Info.BasePointersArray, CGM.getPointerAlign());
8063     InputInfo.PointersArray =
8064         Address(Info.PointersArray, CGM.getPointerAlign());
8065     InputInfo.SizesArray =
8066         Address(Info.SizesArray, CGM.getPointerAlign());
8067     MapTypesArray = Info.MapTypesArray;
8068     if (D.hasClausesOfKind<OMPDependClause>())
8069       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
8070     else
8071       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
8072   };
8073 
8074   if (IfCond)
8075     emitOMPIfClause(CGF, IfCond, TargetThenGen,
8076                     [](CodeGenFunction &CGF, PrePostActionTy &) {});
8077   else {
8078     RegionCodeGenTy ThenRCG(TargetThenGen);
8079     ThenRCG(CGF);
8080   }
8081 }
8082 
8083 namespace {
8084   /// Kind of parameter in a function with 'declare simd' directive.
8085   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
8086   /// Attribute set of the parameter.
8087   struct ParamAttrTy {
8088     ParamKindTy Kind = Vector;
8089     llvm::APSInt StrideOrArg;
8090     llvm::APSInt Alignment;
8091   };
8092 } // namespace
8093 
8094 static unsigned evaluateCDTSize(const FunctionDecl *FD,
8095                                 ArrayRef<ParamAttrTy> ParamAttrs) {
8096   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
8097   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
8098   // of that clause. The VLEN value must be power of 2.
8099   // In other case the notion of the function`s "characteristic data type" (CDT)
8100   // is used to compute the vector length.
8101   // CDT is defined in the following order:
8102   //   a) For non-void function, the CDT is the return type.
8103   //   b) If the function has any non-uniform, non-linear parameters, then the
8104   //   CDT is the type of the first such parameter.
8105   //   c) If the CDT determined by a) or b) above is struct, union, or class
8106   //   type which is pass-by-value (except for the type that maps to the
8107   //   built-in complex data type), the characteristic data type is int.
8108   //   d) If none of the above three cases is applicable, the CDT is int.
8109   // The VLEN is then determined based on the CDT and the size of vector
8110   // register of that ISA for which current vector version is generated. The
8111   // VLEN is computed using the formula below:
8112   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
8113   // where vector register size specified in section 3.2.1 Registers and the
8114   // Stack Frame of original AMD64 ABI document.
8115   QualType RetType = FD->getReturnType();
8116   if (RetType.isNull())
8117     return 0;
8118   ASTContext &C = FD->getASTContext();
8119   QualType CDT;
8120   if (!RetType.isNull() && !RetType->isVoidType())
8121     CDT = RetType;
8122   else {
8123     unsigned Offset = 0;
8124     if (auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
8125       if (ParamAttrs[Offset].Kind == Vector)
8126         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
8127       ++Offset;
8128     }
8129     if (CDT.isNull()) {
8130       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
8131         if (ParamAttrs[I + Offset].Kind == Vector) {
8132           CDT = FD->getParamDecl(I)->getType();
8133           break;
8134         }
8135       }
8136     }
8137   }
8138   if (CDT.isNull())
8139     CDT = C.IntTy;
8140   CDT = CDT->getCanonicalTypeUnqualified();
8141   if (CDT->isRecordType() || CDT->isUnionType())
8142     CDT = C.IntTy;
8143   return C.getTypeSize(CDT);
8144 }
8145 
8146 static void
8147 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
8148                            const llvm::APSInt &VLENVal,
8149                            ArrayRef<ParamAttrTy> ParamAttrs,
8150                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
8151   struct ISADataTy {
8152     char ISA;
8153     unsigned VecRegSize;
8154   };
8155   ISADataTy ISAData[] = {
8156       {
8157           'b', 128
8158       }, // SSE
8159       {
8160           'c', 256
8161       }, // AVX
8162       {
8163           'd', 256
8164       }, // AVX2
8165       {
8166           'e', 512
8167       }, // AVX512
8168   };
8169   llvm::SmallVector<char, 2> Masked;
8170   switch (State) {
8171   case OMPDeclareSimdDeclAttr::BS_Undefined:
8172     Masked.push_back('N');
8173     Masked.push_back('M');
8174     break;
8175   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
8176     Masked.push_back('N');
8177     break;
8178   case OMPDeclareSimdDeclAttr::BS_Inbranch:
8179     Masked.push_back('M');
8180     break;
8181   }
8182   for (auto Mask : Masked) {
8183     for (auto &Data : ISAData) {
8184       SmallString<256> Buffer;
8185       llvm::raw_svector_ostream Out(Buffer);
8186       Out << "_ZGV" << Data.ISA << Mask;
8187       if (!VLENVal) {
8188         Out << llvm::APSInt::getUnsigned(Data.VecRegSize /
8189                                          evaluateCDTSize(FD, ParamAttrs));
8190       } else
8191         Out << VLENVal;
8192       for (auto &ParamAttr : ParamAttrs) {
8193         switch (ParamAttr.Kind){
8194         case LinearWithVarStride:
8195           Out << 's' << ParamAttr.StrideOrArg;
8196           break;
8197         case Linear:
8198           Out << 'l';
8199           if (!!ParamAttr.StrideOrArg)
8200             Out << ParamAttr.StrideOrArg;
8201           break;
8202         case Uniform:
8203           Out << 'u';
8204           break;
8205         case Vector:
8206           Out << 'v';
8207           break;
8208         }
8209         if (!!ParamAttr.Alignment)
8210           Out << 'a' << ParamAttr.Alignment;
8211       }
8212       Out << '_' << Fn->getName();
8213       Fn->addFnAttr(Out.str());
8214     }
8215   }
8216 }
8217 
8218 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
8219                                               llvm::Function *Fn) {
8220   ASTContext &C = CGM.getContext();
8221   FD = FD->getMostRecentDecl();
8222   // Map params to their positions in function decl.
8223   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
8224   if (isa<CXXMethodDecl>(FD))
8225     ParamPositions.insert({FD, 0});
8226   unsigned ParamPos = ParamPositions.size();
8227   for (auto *P : FD->parameters()) {
8228     ParamPositions.insert({P->getCanonicalDecl(), ParamPos});
8229     ++ParamPos;
8230   }
8231   while (FD) {
8232     for (auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
8233       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
8234       // Mark uniform parameters.
8235       for (auto *E : Attr->uniforms()) {
8236         E = E->IgnoreParenImpCasts();
8237         unsigned Pos;
8238         if (isa<CXXThisExpr>(E))
8239           Pos = ParamPositions[FD];
8240         else {
8241           auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
8242                           ->getCanonicalDecl();
8243           Pos = ParamPositions[PVD];
8244         }
8245         ParamAttrs[Pos].Kind = Uniform;
8246       }
8247       // Get alignment info.
8248       auto NI = Attr->alignments_begin();
8249       for (auto *E : Attr->aligneds()) {
8250         E = E->IgnoreParenImpCasts();
8251         unsigned Pos;
8252         QualType ParmTy;
8253         if (isa<CXXThisExpr>(E)) {
8254           Pos = ParamPositions[FD];
8255           ParmTy = E->getType();
8256         } else {
8257           auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
8258                           ->getCanonicalDecl();
8259           Pos = ParamPositions[PVD];
8260           ParmTy = PVD->getType();
8261         }
8262         ParamAttrs[Pos].Alignment =
8263             (*NI)
8264                 ? (*NI)->EvaluateKnownConstInt(C)
8265                 : llvm::APSInt::getUnsigned(
8266                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
8267                           .getQuantity());
8268         ++NI;
8269       }
8270       // Mark linear parameters.
8271       auto SI = Attr->steps_begin();
8272       auto MI = Attr->modifiers_begin();
8273       for (auto *E : Attr->linears()) {
8274         E = E->IgnoreParenImpCasts();
8275         unsigned Pos;
8276         if (isa<CXXThisExpr>(E))
8277           Pos = ParamPositions[FD];
8278         else {
8279           auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
8280                           ->getCanonicalDecl();
8281           Pos = ParamPositions[PVD];
8282         }
8283         auto &ParamAttr = ParamAttrs[Pos];
8284         ParamAttr.Kind = Linear;
8285         if (*SI) {
8286           if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C,
8287                                     Expr::SE_AllowSideEffects)) {
8288             if (auto *DRE = cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
8289               if (auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
8290                 ParamAttr.Kind = LinearWithVarStride;
8291                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
8292                     ParamPositions[StridePVD->getCanonicalDecl()]);
8293               }
8294             }
8295           }
8296         }
8297         ++SI;
8298         ++MI;
8299       }
8300       llvm::APSInt VLENVal;
8301       if (const Expr *VLEN = Attr->getSimdlen())
8302         VLENVal = VLEN->EvaluateKnownConstInt(C);
8303       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
8304       if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
8305           CGM.getTriple().getArch() == llvm::Triple::x86_64)
8306         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
8307     }
8308     FD = FD->getPreviousDecl();
8309   }
8310 }
8311 
8312 namespace {
8313 /// Cleanup action for doacross support.
8314 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
8315 public:
8316   static const int DoacrossFinArgs = 2;
8317 
8318 private:
8319   llvm::Value *RTLFn;
8320   llvm::Value *Args[DoacrossFinArgs];
8321 
8322 public:
8323   DoacrossCleanupTy(llvm::Value *RTLFn, ArrayRef<llvm::Value *> CallArgs)
8324       : RTLFn(RTLFn) {
8325     assert(CallArgs.size() == DoacrossFinArgs);
8326     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
8327   }
8328   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
8329     if (!CGF.HaveInsertPoint())
8330       return;
8331     CGF.EmitRuntimeCall(RTLFn, Args);
8332   }
8333 };
8334 } // namespace
8335 
8336 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
8337                                        const OMPLoopDirective &D) {
8338   if (!CGF.HaveInsertPoint())
8339     return;
8340 
8341   ASTContext &C = CGM.getContext();
8342   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
8343   RecordDecl *RD;
8344   if (KmpDimTy.isNull()) {
8345     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
8346     //  kmp_int64 lo; // lower
8347     //  kmp_int64 up; // upper
8348     //  kmp_int64 st; // stride
8349     // };
8350     RD = C.buildImplicitRecord("kmp_dim");
8351     RD->startDefinition();
8352     addFieldToRecordDecl(C, RD, Int64Ty);
8353     addFieldToRecordDecl(C, RD, Int64Ty);
8354     addFieldToRecordDecl(C, RD, Int64Ty);
8355     RD->completeDefinition();
8356     KmpDimTy = C.getRecordType(RD);
8357   } else
8358     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
8359 
8360   Address DimsAddr = CGF.CreateMemTemp(KmpDimTy, "dims");
8361   CGF.EmitNullInitialization(DimsAddr, KmpDimTy);
8362   enum { LowerFD = 0, UpperFD, StrideFD };
8363   // Fill dims with data.
8364   LValue DimsLVal = CGF.MakeAddrLValue(DimsAddr, KmpDimTy);
8365   // dims.upper = num_iterations;
8366   LValue UpperLVal =
8367       CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), UpperFD));
8368   llvm::Value *NumIterVal = CGF.EmitScalarConversion(
8369       CGF.EmitScalarExpr(D.getNumIterations()), D.getNumIterations()->getType(),
8370       Int64Ty, D.getNumIterations()->getExprLoc());
8371   CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
8372   // dims.stride = 1;
8373   LValue StrideLVal =
8374       CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), StrideFD));
8375   CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
8376                         StrideLVal);
8377 
8378   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
8379   // kmp_int32 num_dims, struct kmp_dim * dims);
8380   llvm::Value *Args[] = {emitUpdateLocation(CGF, D.getLocStart()),
8381                          getThreadID(CGF, D.getLocStart()),
8382                          llvm::ConstantInt::getSigned(CGM.Int32Ty, 1),
8383                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8384                              DimsAddr.getPointer(), CGM.VoidPtrTy)};
8385 
8386   llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_init);
8387   CGF.EmitRuntimeCall(RTLFn, Args);
8388   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
8389       emitUpdateLocation(CGF, D.getLocEnd()), getThreadID(CGF, D.getLocEnd())};
8390   llvm::Value *FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
8391   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
8392                                              llvm::makeArrayRef(FiniArgs));
8393 }
8394 
8395 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
8396                                           const OMPDependClause *C) {
8397   QualType Int64Ty =
8398       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
8399   const Expr *CounterVal = C->getCounterValue();
8400   assert(CounterVal);
8401   llvm::Value *CntVal = CGF.EmitScalarConversion(CGF.EmitScalarExpr(CounterVal),
8402                                                  CounterVal->getType(), Int64Ty,
8403                                                  CounterVal->getExprLoc());
8404   Address CntAddr = CGF.CreateMemTemp(Int64Ty, ".cnt.addr");
8405   CGF.EmitStoreOfScalar(CntVal, CntAddr, /*Volatile=*/false, Int64Ty);
8406   llvm::Value *Args[] = {emitUpdateLocation(CGF, C->getLocStart()),
8407                          getThreadID(CGF, C->getLocStart()),
8408                          CntAddr.getPointer()};
8409   llvm::Value *RTLFn;
8410   if (C->getDependencyKind() == OMPC_DEPEND_source)
8411     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
8412   else {
8413     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
8414     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
8415   }
8416   CGF.EmitRuntimeCall(RTLFn, Args);
8417 }
8418 
8419 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
8420                                llvm::Value *Callee,
8421                                ArrayRef<llvm::Value *> Args) const {
8422   assert(Loc.isValid() && "Outlined function call location must be valid.");
8423   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
8424 
8425   if (auto *Fn = dyn_cast<llvm::Function>(Callee)) {
8426     if (Fn->doesNotThrow()) {
8427       CGF.EmitNounwindRuntimeCall(Fn, Args);
8428       return;
8429     }
8430   }
8431   CGF.EmitRuntimeCall(Callee, Args);
8432 }
8433 
8434 void CGOpenMPRuntime::emitOutlinedFunctionCall(
8435     CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn,
8436     ArrayRef<llvm::Value *> Args) const {
8437   emitCall(CGF, Loc, OutlinedFn, Args);
8438 }
8439 
8440 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
8441                                              const VarDecl *NativeParam,
8442                                              const VarDecl *TargetParam) const {
8443   return CGF.GetAddrOfLocalVar(NativeParam);
8444 }
8445 
8446 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
8447                                                    const VarDecl *VD) {
8448   return Address::invalid();
8449 }
8450 
8451 llvm::Value *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
8452     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
8453     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
8454   llvm_unreachable("Not supported in SIMD-only mode");
8455 }
8456 
8457 llvm::Value *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
8458     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
8459     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
8460   llvm_unreachable("Not supported in SIMD-only mode");
8461 }
8462 
8463 llvm::Value *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
8464     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
8465     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
8466     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
8467     bool Tied, unsigned &NumberOfParts) {
8468   llvm_unreachable("Not supported in SIMD-only mode");
8469 }
8470 
8471 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
8472                                            SourceLocation Loc,
8473                                            llvm::Value *OutlinedFn,
8474                                            ArrayRef<llvm::Value *> CapturedVars,
8475                                            const Expr *IfCond) {
8476   llvm_unreachable("Not supported in SIMD-only mode");
8477 }
8478 
8479 void CGOpenMPSIMDRuntime::emitCriticalRegion(
8480     CodeGenFunction &CGF, StringRef CriticalName,
8481     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
8482     const Expr *Hint) {
8483   llvm_unreachable("Not supported in SIMD-only mode");
8484 }
8485 
8486 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
8487                                            const RegionCodeGenTy &MasterOpGen,
8488                                            SourceLocation Loc) {
8489   llvm_unreachable("Not supported in SIMD-only mode");
8490 }
8491 
8492 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
8493                                             SourceLocation Loc) {
8494   llvm_unreachable("Not supported in SIMD-only mode");
8495 }
8496 
8497 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
8498     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
8499     SourceLocation Loc) {
8500   llvm_unreachable("Not supported in SIMD-only mode");
8501 }
8502 
8503 void CGOpenMPSIMDRuntime::emitSingleRegion(
8504     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
8505     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
8506     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
8507     ArrayRef<const Expr *> AssignmentOps) {
8508   llvm_unreachable("Not supported in SIMD-only mode");
8509 }
8510 
8511 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
8512                                             const RegionCodeGenTy &OrderedOpGen,
8513                                             SourceLocation Loc,
8514                                             bool IsThreads) {
8515   llvm_unreachable("Not supported in SIMD-only mode");
8516 }
8517 
8518 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
8519                                           SourceLocation Loc,
8520                                           OpenMPDirectiveKind Kind,
8521                                           bool EmitChecks,
8522                                           bool ForceSimpleCall) {
8523   llvm_unreachable("Not supported in SIMD-only mode");
8524 }
8525 
8526 void CGOpenMPSIMDRuntime::emitForDispatchInit(
8527     CodeGenFunction &CGF, SourceLocation Loc,
8528     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
8529     bool Ordered, const DispatchRTInput &DispatchValues) {
8530   llvm_unreachable("Not supported in SIMD-only mode");
8531 }
8532 
8533 void CGOpenMPSIMDRuntime::emitForStaticInit(
8534     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
8535     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
8536   llvm_unreachable("Not supported in SIMD-only mode");
8537 }
8538 
8539 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
8540     CodeGenFunction &CGF, SourceLocation Loc,
8541     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
8542   llvm_unreachable("Not supported in SIMD-only mode");
8543 }
8544 
8545 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
8546                                                      SourceLocation Loc,
8547                                                      unsigned IVSize,
8548                                                      bool IVSigned) {
8549   llvm_unreachable("Not supported in SIMD-only mode");
8550 }
8551 
8552 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
8553                                               SourceLocation Loc,
8554                                               OpenMPDirectiveKind DKind) {
8555   llvm_unreachable("Not supported in SIMD-only mode");
8556 }
8557 
8558 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
8559                                               SourceLocation Loc,
8560                                               unsigned IVSize, bool IVSigned,
8561                                               Address IL, Address LB,
8562                                               Address UB, Address ST) {
8563   llvm_unreachable("Not supported in SIMD-only mode");
8564 }
8565 
8566 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
8567                                                llvm::Value *NumThreads,
8568                                                SourceLocation Loc) {
8569   llvm_unreachable("Not supported in SIMD-only mode");
8570 }
8571 
8572 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
8573                                              OpenMPProcBindClauseKind ProcBind,
8574                                              SourceLocation Loc) {
8575   llvm_unreachable("Not supported in SIMD-only mode");
8576 }
8577 
8578 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
8579                                                     const VarDecl *VD,
8580                                                     Address VDAddr,
8581                                                     SourceLocation Loc) {
8582   llvm_unreachable("Not supported in SIMD-only mode");
8583 }
8584 
8585 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
8586     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
8587     CodeGenFunction *CGF) {
8588   llvm_unreachable("Not supported in SIMD-only mode");
8589 }
8590 
8591 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
8592     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
8593   llvm_unreachable("Not supported in SIMD-only mode");
8594 }
8595 
8596 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
8597                                     ArrayRef<const Expr *> Vars,
8598                                     SourceLocation Loc) {
8599   llvm_unreachable("Not supported in SIMD-only mode");
8600 }
8601 
8602 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
8603                                        const OMPExecutableDirective &D,
8604                                        llvm::Value *TaskFunction,
8605                                        QualType SharedsTy, Address Shareds,
8606                                        const Expr *IfCond,
8607                                        const OMPTaskDataTy &Data) {
8608   llvm_unreachable("Not supported in SIMD-only mode");
8609 }
8610 
8611 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
8612     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
8613     llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds,
8614     const Expr *IfCond, const OMPTaskDataTy &Data) {
8615   llvm_unreachable("Not supported in SIMD-only mode");
8616 }
8617 
8618 void CGOpenMPSIMDRuntime::emitReduction(
8619     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
8620     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
8621     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
8622   assert(Options.SimpleReduction && "Only simple reduction is expected.");
8623   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
8624                                  ReductionOps, Options);
8625 }
8626 
8627 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
8628     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
8629     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
8630   llvm_unreachable("Not supported in SIMD-only mode");
8631 }
8632 
8633 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
8634                                                   SourceLocation Loc,
8635                                                   ReductionCodeGen &RCG,
8636                                                   unsigned N) {
8637   llvm_unreachable("Not supported in SIMD-only mode");
8638 }
8639 
8640 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
8641                                                   SourceLocation Loc,
8642                                                   llvm::Value *ReductionsPtr,
8643                                                   LValue SharedLVal) {
8644   llvm_unreachable("Not supported in SIMD-only mode");
8645 }
8646 
8647 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
8648                                            SourceLocation Loc) {
8649   llvm_unreachable("Not supported in SIMD-only mode");
8650 }
8651 
8652 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
8653     CodeGenFunction &CGF, SourceLocation Loc,
8654     OpenMPDirectiveKind CancelRegion) {
8655   llvm_unreachable("Not supported in SIMD-only mode");
8656 }
8657 
8658 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
8659                                          SourceLocation Loc, const Expr *IfCond,
8660                                          OpenMPDirectiveKind CancelRegion) {
8661   llvm_unreachable("Not supported in SIMD-only mode");
8662 }
8663 
8664 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
8665     const OMPExecutableDirective &D, StringRef ParentName,
8666     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
8667     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
8668   llvm_unreachable("Not supported in SIMD-only mode");
8669 }
8670 
8671 void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF,
8672                                          const OMPExecutableDirective &D,
8673                                          llvm::Value *OutlinedFn,
8674                                          llvm::Value *OutlinedFnID,
8675                                          const Expr *IfCond, const Expr *Device) {
8676   llvm_unreachable("Not supported in SIMD-only mode");
8677 }
8678 
8679 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
8680   llvm_unreachable("Not supported in SIMD-only mode");
8681 }
8682 
8683 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
8684   llvm_unreachable("Not supported in SIMD-only mode");
8685 }
8686 
8687 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
8688   return false;
8689 }
8690 
8691 llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() {
8692   return nullptr;
8693 }
8694 
8695 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
8696                                         const OMPExecutableDirective &D,
8697                                         SourceLocation Loc,
8698                                         llvm::Value *OutlinedFn,
8699                                         ArrayRef<llvm::Value *> CapturedVars) {
8700   llvm_unreachable("Not supported in SIMD-only mode");
8701 }
8702 
8703 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
8704                                              const Expr *NumTeams,
8705                                              const Expr *ThreadLimit,
8706                                              SourceLocation Loc) {
8707   llvm_unreachable("Not supported in SIMD-only mode");
8708 }
8709 
8710 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
8711     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
8712     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
8713   llvm_unreachable("Not supported in SIMD-only mode");
8714 }
8715 
8716 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
8717     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
8718     const Expr *Device) {
8719   llvm_unreachable("Not supported in SIMD-only mode");
8720 }
8721 
8722 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
8723                                            const OMPLoopDirective &D) {
8724   llvm_unreachable("Not supported in SIMD-only mode");
8725 }
8726 
8727 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
8728                                               const OMPDependClause *C) {
8729   llvm_unreachable("Not supported in SIMD-only mode");
8730 }
8731 
8732 const VarDecl *
8733 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
8734                                         const VarDecl *NativeParam) const {
8735   llvm_unreachable("Not supported in SIMD-only mode");
8736 }
8737 
8738 Address
8739 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
8740                                          const VarDecl *NativeParam,
8741                                          const VarDecl *TargetParam) const {
8742   llvm_unreachable("Not supported in SIMD-only mode");
8743 }
8744 
8745