1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for OpenMP runtime code generation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGOpenMPRuntime.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Decl.h"
19 #include "clang/AST/StmtOpenMP.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/Bitcode/BitcodeReader.h"
22 #include "llvm/IR/CallSite.h"
23 #include "llvm/IR/DerivedTypes.h"
24 #include "llvm/IR/GlobalValue.h"
25 #include "llvm/IR/Value.h"
26 #include "llvm/Support/Format.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <cassert>
29 
30 using namespace clang;
31 using namespace CodeGen;
32 
33 namespace {
34 /// \brief Base class for handling code generation inside OpenMP regions.
35 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
36 public:
37   /// \brief Kinds of OpenMP regions used in codegen.
38   enum CGOpenMPRegionKind {
39     /// \brief Region with outlined function for standalone 'parallel'
40     /// directive.
41     ParallelOutlinedRegion,
42     /// \brief Region with outlined function for standalone 'task' directive.
43     TaskOutlinedRegion,
44     /// \brief Region for constructs that do not require function outlining,
45     /// like 'for', 'sections', 'atomic' etc. directives.
46     InlinedRegion,
47     /// \brief Region with outlined function for standalone 'target' directive.
48     TargetRegion,
49   };
50 
51   CGOpenMPRegionInfo(const CapturedStmt &CS,
52                      const CGOpenMPRegionKind RegionKind,
53                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
54                      bool HasCancel)
55       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
56         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
57 
58   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
59                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
60                      bool HasCancel)
61       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
62         Kind(Kind), HasCancel(HasCancel) {}
63 
64   /// \brief Get a variable or parameter for storing global thread id
65   /// inside OpenMP construct.
66   virtual const VarDecl *getThreadIDVariable() const = 0;
67 
68   /// \brief Emit the captured statement body.
69   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
70 
71   /// \brief Get an LValue for the current ThreadID variable.
72   /// \return LValue for thread id variable. This LValue always has type int32*.
73   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
74 
75   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
76 
77   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
78 
79   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
80 
81   bool hasCancel() const { return HasCancel; }
82 
83   static bool classof(const CGCapturedStmtInfo *Info) {
84     return Info->getKind() == CR_OpenMP;
85   }
86 
87   ~CGOpenMPRegionInfo() override = default;
88 
89 protected:
90   CGOpenMPRegionKind RegionKind;
91   RegionCodeGenTy CodeGen;
92   OpenMPDirectiveKind Kind;
93   bool HasCancel;
94 };
95 
96 /// \brief API for captured statement code generation in OpenMP constructs.
97 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
98 public:
99   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
100                              const RegionCodeGenTy &CodeGen,
101                              OpenMPDirectiveKind Kind, bool HasCancel)
102       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
103                            HasCancel),
104         ThreadIDVar(ThreadIDVar) {
105     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
106   }
107 
108   /// \brief Get a variable or parameter for storing global thread id
109   /// inside OpenMP construct.
110   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
111 
112   /// \brief Get the name of the capture helper.
113   StringRef getHelperName() const override { return ".omp_outlined."; }
114 
115   static bool classof(const CGCapturedStmtInfo *Info) {
116     return CGOpenMPRegionInfo::classof(Info) &&
117            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
118                ParallelOutlinedRegion;
119   }
120 
121 private:
122   /// \brief A variable or parameter storing global thread id for OpenMP
123   /// constructs.
124   const VarDecl *ThreadIDVar;
125 };
126 
127 /// \brief API for captured statement code generation in OpenMP constructs.
128 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
129 public:
130   class UntiedTaskActionTy final : public PrePostActionTy {
131     bool Untied;
132     const VarDecl *PartIDVar;
133     const RegionCodeGenTy UntiedCodeGen;
134     llvm::SwitchInst *UntiedSwitch = nullptr;
135 
136   public:
137     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
138                        const RegionCodeGenTy &UntiedCodeGen)
139         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
140     void Enter(CodeGenFunction &CGF) override {
141       if (Untied) {
142         // Emit task switching point.
143         auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
144             CGF.GetAddrOfLocalVar(PartIDVar),
145             PartIDVar->getType()->castAs<PointerType>());
146         auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, SourceLocation());
147         auto *DoneBB = CGF.createBasicBlock(".untied.done.");
148         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
149         CGF.EmitBlock(DoneBB);
150         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
151         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
152         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
153                               CGF.Builder.GetInsertBlock());
154         emitUntiedSwitch(CGF);
155       }
156     }
157     void emitUntiedSwitch(CodeGenFunction &CGF) const {
158       if (Untied) {
159         auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
160             CGF.GetAddrOfLocalVar(PartIDVar),
161             PartIDVar->getType()->castAs<PointerType>());
162         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
163                               PartIdLVal);
164         UntiedCodeGen(CGF);
165         CodeGenFunction::JumpDest CurPoint =
166             CGF.getJumpDestInCurrentScope(".untied.next.");
167         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
168         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
169         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
170                               CGF.Builder.GetInsertBlock());
171         CGF.EmitBranchThroughCleanup(CurPoint);
172         CGF.EmitBlock(CurPoint.getBlock());
173       }
174     }
175     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
176   };
177   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
178                                  const VarDecl *ThreadIDVar,
179                                  const RegionCodeGenTy &CodeGen,
180                                  OpenMPDirectiveKind Kind, bool HasCancel,
181                                  const UntiedTaskActionTy &Action)
182       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
183         ThreadIDVar(ThreadIDVar), Action(Action) {
184     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
185   }
186 
187   /// \brief Get a variable or parameter for storing global thread id
188   /// inside OpenMP construct.
189   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
190 
191   /// \brief Get an LValue for the current ThreadID variable.
192   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
193 
194   /// \brief Get the name of the capture helper.
195   StringRef getHelperName() const override { return ".omp_outlined."; }
196 
197   void emitUntiedSwitch(CodeGenFunction &CGF) override {
198     Action.emitUntiedSwitch(CGF);
199   }
200 
201   static bool classof(const CGCapturedStmtInfo *Info) {
202     return CGOpenMPRegionInfo::classof(Info) &&
203            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
204                TaskOutlinedRegion;
205   }
206 
207 private:
208   /// \brief A variable or parameter storing global thread id for OpenMP
209   /// constructs.
210   const VarDecl *ThreadIDVar;
211   /// Action for emitting code for untied tasks.
212   const UntiedTaskActionTy &Action;
213 };
214 
215 /// \brief API for inlined captured statement code generation in OpenMP
216 /// constructs.
217 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
218 public:
219   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
220                             const RegionCodeGenTy &CodeGen,
221                             OpenMPDirectiveKind Kind, bool HasCancel)
222       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
223         OldCSI(OldCSI),
224         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
225 
226   // \brief Retrieve the value of the context parameter.
227   llvm::Value *getContextValue() const override {
228     if (OuterRegionInfo)
229       return OuterRegionInfo->getContextValue();
230     llvm_unreachable("No context value for inlined OpenMP region");
231   }
232 
233   void setContextValue(llvm::Value *V) override {
234     if (OuterRegionInfo) {
235       OuterRegionInfo->setContextValue(V);
236       return;
237     }
238     llvm_unreachable("No context value for inlined OpenMP region");
239   }
240 
241   /// \brief Lookup the captured field decl for a variable.
242   const FieldDecl *lookup(const VarDecl *VD) const override {
243     if (OuterRegionInfo)
244       return OuterRegionInfo->lookup(VD);
245     // If there is no outer outlined region,no need to lookup in a list of
246     // captured variables, we can use the original one.
247     return nullptr;
248   }
249 
250   FieldDecl *getThisFieldDecl() const override {
251     if (OuterRegionInfo)
252       return OuterRegionInfo->getThisFieldDecl();
253     return nullptr;
254   }
255 
256   /// \brief Get a variable or parameter for storing global thread id
257   /// inside OpenMP construct.
258   const VarDecl *getThreadIDVariable() const override {
259     if (OuterRegionInfo)
260       return OuterRegionInfo->getThreadIDVariable();
261     return nullptr;
262   }
263 
264   /// \brief Get the name of the capture helper.
265   StringRef getHelperName() const override {
266     if (auto *OuterRegionInfo = getOldCSI())
267       return OuterRegionInfo->getHelperName();
268     llvm_unreachable("No helper name for inlined OpenMP construct");
269   }
270 
271   void emitUntiedSwitch(CodeGenFunction &CGF) override {
272     if (OuterRegionInfo)
273       OuterRegionInfo->emitUntiedSwitch(CGF);
274   }
275 
276   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
277 
278   static bool classof(const CGCapturedStmtInfo *Info) {
279     return CGOpenMPRegionInfo::classof(Info) &&
280            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
281   }
282 
283   ~CGOpenMPInlinedRegionInfo() override = default;
284 
285 private:
286   /// \brief CodeGen info about outer OpenMP region.
287   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
288   CGOpenMPRegionInfo *OuterRegionInfo;
289 };
290 
291 /// \brief API for captured statement code generation in OpenMP target
292 /// constructs. For this captures, implicit parameters are used instead of the
293 /// captured fields. The name of the target region has to be unique in a given
294 /// application so it is provided by the client, because only the client has
295 /// the information to generate that.
296 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
297 public:
298   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
299                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
300       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
301                            /*HasCancel=*/false),
302         HelperName(HelperName) {}
303 
304   /// \brief This is unused for target regions because each starts executing
305   /// with a single thread.
306   const VarDecl *getThreadIDVariable() const override { return nullptr; }
307 
308   /// \brief Get the name of the capture helper.
309   StringRef getHelperName() const override { return HelperName; }
310 
311   static bool classof(const CGCapturedStmtInfo *Info) {
312     return CGOpenMPRegionInfo::classof(Info) &&
313            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
314   }
315 
316 private:
317   StringRef HelperName;
318 };
319 
320 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
321   llvm_unreachable("No codegen for expressions");
322 }
323 /// \brief API for generation of expressions captured in a innermost OpenMP
324 /// region.
325 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
326 public:
327   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
328       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
329                                   OMPD_unknown,
330                                   /*HasCancel=*/false),
331         PrivScope(CGF) {
332     // Make sure the globals captured in the provided statement are local by
333     // using the privatization logic. We assume the same variable is not
334     // captured more than once.
335     for (auto &C : CS.captures()) {
336       if (!C.capturesVariable() && !C.capturesVariableByCopy())
337         continue;
338 
339       const VarDecl *VD = C.getCapturedVar();
340       if (VD->isLocalVarDeclOrParm())
341         continue;
342 
343       DeclRefExpr DRE(const_cast<VarDecl *>(VD),
344                       /*RefersToEnclosingVariableOrCapture=*/false,
345                       VD->getType().getNonReferenceType(), VK_LValue,
346                       SourceLocation());
347       PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address {
348         return CGF.EmitLValue(&DRE).getAddress();
349       });
350     }
351     (void)PrivScope.Privatize();
352   }
353 
354   /// \brief Lookup the captured field decl for a variable.
355   const FieldDecl *lookup(const VarDecl *VD) const override {
356     if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
357       return FD;
358     return nullptr;
359   }
360 
361   /// \brief Emit the captured statement body.
362   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
363     llvm_unreachable("No body for expressions");
364   }
365 
366   /// \brief Get a variable or parameter for storing global thread id
367   /// inside OpenMP construct.
368   const VarDecl *getThreadIDVariable() const override {
369     llvm_unreachable("No thread id for expressions");
370   }
371 
372   /// \brief Get the name of the capture helper.
373   StringRef getHelperName() const override {
374     llvm_unreachable("No helper name for expressions");
375   }
376 
377   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
378 
379 private:
380   /// Private scope to capture global variables.
381   CodeGenFunction::OMPPrivateScope PrivScope;
382 };
383 
384 /// \brief RAII for emitting code of OpenMP constructs.
385 class InlinedOpenMPRegionRAII {
386   CodeGenFunction &CGF;
387   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
388   FieldDecl *LambdaThisCaptureField = nullptr;
389 
390 public:
391   /// \brief Constructs region for combined constructs.
392   /// \param CodeGen Code generation sequence for combined directives. Includes
393   /// a list of functions used for code generation of implicitly inlined
394   /// regions.
395   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
396                           OpenMPDirectiveKind Kind, bool HasCancel)
397       : CGF(CGF) {
398     // Start emission for the construct.
399     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
400         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
401     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
402     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
403     CGF.LambdaThisCaptureField = nullptr;
404   }
405 
406   ~InlinedOpenMPRegionRAII() {
407     // Restore original CapturedStmtInfo only if we're done with code emission.
408     auto *OldCSI =
409         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
410     delete CGF.CapturedStmtInfo;
411     CGF.CapturedStmtInfo = OldCSI;
412     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
413     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
414   }
415 };
416 
417 /// \brief Values for bit flags used in the ident_t to describe the fields.
418 /// All enumeric elements are named and described in accordance with the code
419 /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
420 enum OpenMPLocationFlags {
421   /// \brief Use trampoline for internal microtask.
422   OMP_IDENT_IMD = 0x01,
423   /// \brief Use c-style ident structure.
424   OMP_IDENT_KMPC = 0x02,
425   /// \brief Atomic reduction option for kmpc_reduce.
426   OMP_ATOMIC_REDUCE = 0x10,
427   /// \brief Explicit 'barrier' directive.
428   OMP_IDENT_BARRIER_EXPL = 0x20,
429   /// \brief Implicit barrier in code.
430   OMP_IDENT_BARRIER_IMPL = 0x40,
431   /// \brief Implicit barrier in 'for' directive.
432   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
433   /// \brief Implicit barrier in 'sections' directive.
434   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
435   /// \brief Implicit barrier in 'single' directive.
436   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140
437 };
438 
439 /// \brief Describes ident structure that describes a source location.
440 /// All descriptions are taken from
441 /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
442 /// Original structure:
443 /// typedef struct ident {
444 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
445 ///                                  see above  */
446 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
447 ///                                  KMP_IDENT_KMPC identifies this union
448 ///                                  member  */
449 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
450 ///                                  see above */
451 ///#if USE_ITT_BUILD
452 ///                            /*  but currently used for storing
453 ///                                region-specific ITT */
454 ///                            /*  contextual information. */
455 ///#endif /* USE_ITT_BUILD */
456 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
457 ///                                 C++  */
458 ///    char const *psource;    /**< String describing the source location.
459 ///                            The string is composed of semi-colon separated
460 //                             fields which describe the source file,
461 ///                            the function and a pair of line numbers that
462 ///                            delimit the construct.
463 ///                             */
464 /// } ident_t;
465 enum IdentFieldIndex {
466   /// \brief might be used in Fortran
467   IdentField_Reserved_1,
468   /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
469   IdentField_Flags,
470   /// \brief Not really used in Fortran any more
471   IdentField_Reserved_2,
472   /// \brief Source[4] in Fortran, do not use for C++
473   IdentField_Reserved_3,
474   /// \brief String describing the source location. The string is composed of
475   /// semi-colon separated fields which describe the source file, the function
476   /// and a pair of line numbers that delimit the construct.
477   IdentField_PSource
478 };
479 
480 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from
481 /// the enum sched_type in kmp.h).
482 enum OpenMPSchedType {
483   /// \brief Lower bound for default (unordered) versions.
484   OMP_sch_lower = 32,
485   OMP_sch_static_chunked = 33,
486   OMP_sch_static = 34,
487   OMP_sch_dynamic_chunked = 35,
488   OMP_sch_guided_chunked = 36,
489   OMP_sch_runtime = 37,
490   OMP_sch_auto = 38,
491   /// static with chunk adjustment (e.g., simd)
492   OMP_sch_static_balanced_chunked   = 45,
493   /// \brief Lower bound for 'ordered' versions.
494   OMP_ord_lower = 64,
495   OMP_ord_static_chunked = 65,
496   OMP_ord_static = 66,
497   OMP_ord_dynamic_chunked = 67,
498   OMP_ord_guided_chunked = 68,
499   OMP_ord_runtime = 69,
500   OMP_ord_auto = 70,
501   OMP_sch_default = OMP_sch_static,
502   /// \brief dist_schedule types
503   OMP_dist_sch_static_chunked = 91,
504   OMP_dist_sch_static = 92,
505   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
506   /// Set if the monotonic schedule modifier was present.
507   OMP_sch_modifier_monotonic = (1 << 29),
508   /// Set if the nonmonotonic schedule modifier was present.
509   OMP_sch_modifier_nonmonotonic = (1 << 30),
510 };
511 
512 enum OpenMPRTLFunction {
513   /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
514   /// kmpc_micro microtask, ...);
515   OMPRTL__kmpc_fork_call,
516   /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc,
517   /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
518   OMPRTL__kmpc_threadprivate_cached,
519   /// \brief Call to void __kmpc_threadprivate_register( ident_t *,
520   /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
521   OMPRTL__kmpc_threadprivate_register,
522   // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
523   OMPRTL__kmpc_global_thread_num,
524   // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
525   // kmp_critical_name *crit);
526   OMPRTL__kmpc_critical,
527   // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
528   // global_tid, kmp_critical_name *crit, uintptr_t hint);
529   OMPRTL__kmpc_critical_with_hint,
530   // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
531   // kmp_critical_name *crit);
532   OMPRTL__kmpc_end_critical,
533   // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
534   // global_tid);
535   OMPRTL__kmpc_cancel_barrier,
536   // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
537   OMPRTL__kmpc_barrier,
538   // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
539   OMPRTL__kmpc_for_static_fini,
540   // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
541   // global_tid);
542   OMPRTL__kmpc_serialized_parallel,
543   // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
544   // global_tid);
545   OMPRTL__kmpc_end_serialized_parallel,
546   // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
547   // kmp_int32 num_threads);
548   OMPRTL__kmpc_push_num_threads,
549   // Call to void __kmpc_flush(ident_t *loc);
550   OMPRTL__kmpc_flush,
551   // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
552   OMPRTL__kmpc_master,
553   // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
554   OMPRTL__kmpc_end_master,
555   // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
556   // int end_part);
557   OMPRTL__kmpc_omp_taskyield,
558   // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
559   OMPRTL__kmpc_single,
560   // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
561   OMPRTL__kmpc_end_single,
562   // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
563   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
564   // kmp_routine_entry_t *task_entry);
565   OMPRTL__kmpc_omp_task_alloc,
566   // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
567   // new_task);
568   OMPRTL__kmpc_omp_task,
569   // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
570   // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
571   // kmp_int32 didit);
572   OMPRTL__kmpc_copyprivate,
573   // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
574   // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
575   // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
576   OMPRTL__kmpc_reduce,
577   // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
578   // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
579   // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
580   // *lck);
581   OMPRTL__kmpc_reduce_nowait,
582   // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
583   // kmp_critical_name *lck);
584   OMPRTL__kmpc_end_reduce,
585   // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
586   // kmp_critical_name *lck);
587   OMPRTL__kmpc_end_reduce_nowait,
588   // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
589   // kmp_task_t * new_task);
590   OMPRTL__kmpc_omp_task_begin_if0,
591   // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
592   // kmp_task_t * new_task);
593   OMPRTL__kmpc_omp_task_complete_if0,
594   // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
595   OMPRTL__kmpc_ordered,
596   // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
597   OMPRTL__kmpc_end_ordered,
598   // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
599   // global_tid);
600   OMPRTL__kmpc_omp_taskwait,
601   // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
602   OMPRTL__kmpc_taskgroup,
603   // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
604   OMPRTL__kmpc_end_taskgroup,
605   // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
606   // int proc_bind);
607   OMPRTL__kmpc_push_proc_bind,
608   // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
609   // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
610   // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
611   OMPRTL__kmpc_omp_task_with_deps,
612   // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
613   // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
614   // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
615   OMPRTL__kmpc_omp_wait_deps,
616   // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
617   // global_tid, kmp_int32 cncl_kind);
618   OMPRTL__kmpc_cancellationpoint,
619   // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
620   // kmp_int32 cncl_kind);
621   OMPRTL__kmpc_cancel,
622   // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
623   // kmp_int32 num_teams, kmp_int32 thread_limit);
624   OMPRTL__kmpc_push_num_teams,
625   // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
626   // microtask, ...);
627   OMPRTL__kmpc_fork_teams,
628   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
629   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
630   // sched, kmp_uint64 grainsize, void *task_dup);
631   OMPRTL__kmpc_taskloop,
632   // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
633   // num_dims, struct kmp_dim *dims);
634   OMPRTL__kmpc_doacross_init,
635   // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
636   OMPRTL__kmpc_doacross_fini,
637   // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
638   // *vec);
639   OMPRTL__kmpc_doacross_post,
640   // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
641   // *vec);
642   OMPRTL__kmpc_doacross_wait,
643 
644   //
645   // Offloading related calls
646   //
647   // Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
648   // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
649   // *arg_types);
650   OMPRTL__tgt_target,
651   // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
652   // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
653   // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
654   OMPRTL__tgt_target_teams,
655   // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
656   OMPRTL__tgt_register_lib,
657   // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
658   OMPRTL__tgt_unregister_lib,
659   // Call to void __tgt_target_data_begin(int32_t device_id, int32_t arg_num,
660   // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
661   OMPRTL__tgt_target_data_begin,
662   // Call to void __tgt_target_data_end(int32_t device_id, int32_t arg_num,
663   // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
664   OMPRTL__tgt_target_data_end,
665   // Call to void __tgt_target_data_update(int32_t device_id, int32_t arg_num,
666   // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
667   OMPRTL__tgt_target_data_update,
668 };
669 
670 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
671 /// region.
672 class CleanupTy final : public EHScopeStack::Cleanup {
673   PrePostActionTy *Action;
674 
675 public:
676   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
677   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
678     if (!CGF.HaveInsertPoint())
679       return;
680     Action->Exit(CGF);
681   }
682 };
683 
684 } // anonymous namespace
685 
686 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
687   CodeGenFunction::RunCleanupsScope Scope(CGF);
688   if (PrePostAction) {
689     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
690     Callback(CodeGen, CGF, *PrePostAction);
691   } else {
692     PrePostActionTy Action;
693     Callback(CodeGen, CGF, Action);
694   }
695 }
696 
697 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
698   return CGF.EmitLoadOfPointerLValue(
699       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
700       getThreadIDVariable()->getType()->castAs<PointerType>());
701 }
702 
703 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
704   if (!CGF.HaveInsertPoint())
705     return;
706   // 1.2.2 OpenMP Language Terminology
707   // Structured block - An executable statement with a single entry at the
708   // top and a single exit at the bottom.
709   // The point of exit cannot be a branch out of the structured block.
710   // longjmp() and throw() must not violate the entry/exit criteria.
711   CGF.EHStack.pushTerminate();
712   CodeGen(CGF);
713   CGF.EHStack.popTerminate();
714 }
715 
716 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
717     CodeGenFunction &CGF) {
718   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
719                             getThreadIDVariable()->getType(),
720                             AlignmentSource::Decl);
721 }
722 
723 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
724     : CGM(CGM), OffloadEntriesInfoManager(CGM) {
725   IdentTy = llvm::StructType::create(
726       "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
727       CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
728       CGM.Int8PtrTy /* psource */, nullptr);
729   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
730 
731   loadOffloadInfoMetadata();
732 }
733 
734 void CGOpenMPRuntime::clear() {
735   InternalVars.clear();
736 }
737 
738 static llvm::Function *
739 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
740                           const Expr *CombinerInitializer, const VarDecl *In,
741                           const VarDecl *Out, bool IsCombiner) {
742   // void .omp_combiner.(Ty *in, Ty *out);
743   auto &C = CGM.getContext();
744   QualType PtrTy = C.getPointerType(Ty).withRestrict();
745   FunctionArgList Args;
746   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
747                                /*Id=*/nullptr, PtrTy);
748   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
749                               /*Id=*/nullptr, PtrTy);
750   Args.push_back(&OmpOutParm);
751   Args.push_back(&OmpInParm);
752   auto &FnInfo =
753       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
754   auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
755   auto *Fn = llvm::Function::Create(
756       FnTy, llvm::GlobalValue::InternalLinkage,
757       IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule());
758   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
759   Fn->addFnAttr(llvm::Attribute::AlwaysInline);
760   CodeGenFunction CGF(CGM);
761   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
762   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
763   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
764   CodeGenFunction::OMPPrivateScope Scope(CGF);
765   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
766   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address {
767     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
768         .getAddress();
769   });
770   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
771   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() -> Address {
772     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
773         .getAddress();
774   });
775   (void)Scope.Privatize();
776   CGF.EmitIgnoredExpr(CombinerInitializer);
777   Scope.ForceCleanup();
778   CGF.FinishFunction();
779   return Fn;
780 }
781 
782 void CGOpenMPRuntime::emitUserDefinedReduction(
783     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
784   if (UDRMap.count(D) > 0)
785     return;
786   auto &C = CGM.getContext();
787   if (!In || !Out) {
788     In = &C.Idents.get("omp_in");
789     Out = &C.Idents.get("omp_out");
790   }
791   llvm::Function *Combiner = emitCombinerOrInitializer(
792       CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()),
793       cast<VarDecl>(D->lookup(Out).front()),
794       /*IsCombiner=*/true);
795   llvm::Function *Initializer = nullptr;
796   if (auto *Init = D->getInitializer()) {
797     if (!Priv || !Orig) {
798       Priv = &C.Idents.get("omp_priv");
799       Orig = &C.Idents.get("omp_orig");
800     }
801     Initializer = emitCombinerOrInitializer(
802         CGM, D->getType(), Init, cast<VarDecl>(D->lookup(Orig).front()),
803         cast<VarDecl>(D->lookup(Priv).front()),
804         /*IsCombiner=*/false);
805   }
806   UDRMap.insert(std::make_pair(D, std::make_pair(Combiner, Initializer)));
807   if (CGF) {
808     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
809     Decls.second.push_back(D);
810   }
811 }
812 
813 std::pair<llvm::Function *, llvm::Function *>
814 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
815   auto I = UDRMap.find(D);
816   if (I != UDRMap.end())
817     return I->second;
818   emitUserDefinedReduction(/*CGF=*/nullptr, D);
819   return UDRMap.lookup(D);
820 }
821 
822 // Layout information for ident_t.
823 static CharUnits getIdentAlign(CodeGenModule &CGM) {
824   return CGM.getPointerAlign();
825 }
826 static CharUnits getIdentSize(CodeGenModule &CGM) {
827   assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign()));
828   return CharUnits::fromQuantity(16) + CGM.getPointerSize();
829 }
830 static CharUnits getOffsetOfIdentField(IdentFieldIndex Field) {
831   // All the fields except the last are i32, so this works beautifully.
832   return unsigned(Field) * CharUnits::fromQuantity(4);
833 }
834 static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr,
835                                    IdentFieldIndex Field,
836                                    const llvm::Twine &Name = "") {
837   auto Offset = getOffsetOfIdentField(Field);
838   return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name);
839 }
840 
841 llvm::Value *CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction(
842     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
843     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
844   assert(ThreadIDVar->getType()->isPointerType() &&
845          "thread id variable must be of type kmp_int32 *");
846   const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
847   CodeGenFunction CGF(CGM, true);
848   bool HasCancel = false;
849   if (auto *OPD = dyn_cast<OMPParallelDirective>(&D))
850     HasCancel = OPD->hasCancel();
851   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
852     HasCancel = OPSD->hasCancel();
853   else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
854     HasCancel = OPFD->hasCancel();
855   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
856                                     HasCancel);
857   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
858   return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
859 }
860 
861 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
862     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
863     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
864     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
865     bool Tied, unsigned &NumberOfParts) {
866   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
867                                               PrePostActionTy &) {
868     auto *ThreadID = getThreadID(CGF, D.getLocStart());
869     auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart());
870     llvm::Value *TaskArgs[] = {
871         UpLoc, ThreadID,
872         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
873                                     TaskTVar->getType()->castAs<PointerType>())
874             .getPointer()};
875     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
876   };
877   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
878                                                             UntiedCodeGen);
879   CodeGen.setAction(Action);
880   assert(!ThreadIDVar->getType()->isPointerType() &&
881          "thread id variable must be of type kmp_int32 for tasks");
882   auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
883   auto *TD = dyn_cast<OMPTaskDirective>(&D);
884   CodeGenFunction CGF(CGM, true);
885   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
886                                         InnermostKind,
887                                         TD ? TD->hasCancel() : false, Action);
888   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
889   auto *Res = CGF.GenerateCapturedStmtFunction(*CS);
890   if (!Tied)
891     NumberOfParts = Action.getNumberOfParts();
892   return Res;
893 }
894 
895 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
896   CharUnits Align = getIdentAlign(CGM);
897   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
898   if (!Entry) {
899     if (!DefaultOpenMPPSource) {
900       // Initialize default location for psource field of ident_t structure of
901       // all ident_t objects. Format is ";file;function;line;column;;".
902       // Taken from
903       // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
904       DefaultOpenMPPSource =
905           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
906       DefaultOpenMPPSource =
907           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
908     }
909     auto DefaultOpenMPLocation = new llvm::GlobalVariable(
910         CGM.getModule(), IdentTy, /*isConstant*/ true,
911         llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr);
912     DefaultOpenMPLocation->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
913     DefaultOpenMPLocation->setAlignment(Align.getQuantity());
914 
915     llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true);
916     llvm::Constant *Values[] = {Zero,
917                                 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
918                                 Zero, Zero, DefaultOpenMPPSource};
919     llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values);
920     DefaultOpenMPLocation->setInitializer(Init);
921     OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
922   }
923   return Address(Entry, Align);
924 }
925 
926 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
927                                                  SourceLocation Loc,
928                                                  unsigned Flags) {
929   Flags |= OMP_IDENT_KMPC;
930   // If no debug info is generated - return global default location.
931   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
932       Loc.isInvalid())
933     return getOrCreateDefaultLocation(Flags).getPointer();
934 
935   assert(CGF.CurFn && "No function in current CodeGenFunction.");
936 
937   Address LocValue = Address::invalid();
938   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
939   if (I != OpenMPLocThreadIDMap.end())
940     LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM));
941 
942   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
943   // GetOpenMPThreadID was called before this routine.
944   if (!LocValue.isValid()) {
945     // Generate "ident_t .kmpc_loc.addr;"
946     Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM),
947                                       ".kmpc_loc.addr");
948     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
949     Elem.second.DebugLoc = AI.getPointer();
950     LocValue = AI;
951 
952     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
953     CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
954     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
955                              CGM.getSize(getIdentSize(CGF.CGM)));
956   }
957 
958   // char **psource = &.kmpc_loc_<flags>.addr.psource;
959   Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource);
960 
961   auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
962   if (OMPDebugLoc == nullptr) {
963     SmallString<128> Buffer2;
964     llvm::raw_svector_ostream OS2(Buffer2);
965     // Build debug location
966     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
967     OS2 << ";" << PLoc.getFilename() << ";";
968     if (const FunctionDecl *FD =
969             dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
970       OS2 << FD->getQualifiedNameAsString();
971     }
972     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
973     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
974     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
975   }
976   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
977   CGF.Builder.CreateStore(OMPDebugLoc, PSource);
978 
979   // Our callers always pass this to a runtime function, so for
980   // convenience, go ahead and return a naked pointer.
981   return LocValue.getPointer();
982 }
983 
984 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
985                                           SourceLocation Loc) {
986   assert(CGF.CurFn && "No function in current CodeGenFunction.");
987 
988   llvm::Value *ThreadID = nullptr;
989   // Check whether we've already cached a load of the thread id in this
990   // function.
991   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
992   if (I != OpenMPLocThreadIDMap.end()) {
993     ThreadID = I->second.ThreadID;
994     if (ThreadID != nullptr)
995       return ThreadID;
996   }
997   if (auto *OMPRegionInfo =
998           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
999     if (OMPRegionInfo->getThreadIDVariable()) {
1000       // Check if this an outlined function with thread id passed as argument.
1001       auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1002       ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
1003       // If value loaded in entry block, cache it and use it everywhere in
1004       // function.
1005       if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1006         auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1007         Elem.second.ThreadID = ThreadID;
1008       }
1009       return ThreadID;
1010     }
1011   }
1012 
1013   // This is not an outlined function region - need to call __kmpc_int32
1014   // kmpc_global_thread_num(ident_t *loc).
1015   // Generate thread id value and cache this value for use across the
1016   // function.
1017   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1018   CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
1019   ThreadID =
1020       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1021                           emitUpdateLocation(CGF, Loc));
1022   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1023   Elem.second.ThreadID = ThreadID;
1024   return ThreadID;
1025 }
1026 
1027 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1028   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1029   if (OpenMPLocThreadIDMap.count(CGF.CurFn))
1030     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1031   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1032     for(auto *D : FunctionUDRMap[CGF.CurFn]) {
1033       UDRMap.erase(D);
1034     }
1035     FunctionUDRMap.erase(CGF.CurFn);
1036   }
1037 }
1038 
1039 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1040   if (!IdentTy) {
1041   }
1042   return llvm::PointerType::getUnqual(IdentTy);
1043 }
1044 
1045 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1046   if (!Kmpc_MicroTy) {
1047     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1048     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1049                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1050     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1051   }
1052   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1053 }
1054 
1055 llvm::Constant *
1056 CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1057   llvm::Constant *RTLFn = nullptr;
1058   switch (static_cast<OpenMPRTLFunction>(Function)) {
1059   case OMPRTL__kmpc_fork_call: {
1060     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1061     // microtask, ...);
1062     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1063                                 getKmpc_MicroPointerTy()};
1064     llvm::FunctionType *FnTy =
1065         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1066     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1067     break;
1068   }
1069   case OMPRTL__kmpc_global_thread_num: {
1070     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1071     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1072     llvm::FunctionType *FnTy =
1073         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1074     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1075     break;
1076   }
1077   case OMPRTL__kmpc_threadprivate_cached: {
1078     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1079     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1080     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1081                                 CGM.VoidPtrTy, CGM.SizeTy,
1082                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1083     llvm::FunctionType *FnTy =
1084         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1085     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1086     break;
1087   }
1088   case OMPRTL__kmpc_critical: {
1089     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1090     // kmp_critical_name *crit);
1091     llvm::Type *TypeParams[] = {
1092         getIdentTyPointerTy(), CGM.Int32Ty,
1093         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1094     llvm::FunctionType *FnTy =
1095         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1096     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1097     break;
1098   }
1099   case OMPRTL__kmpc_critical_with_hint: {
1100     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1101     // kmp_critical_name *crit, uintptr_t hint);
1102     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1103                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
1104                                 CGM.IntPtrTy};
1105     llvm::FunctionType *FnTy =
1106         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1107     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1108     break;
1109   }
1110   case OMPRTL__kmpc_threadprivate_register: {
1111     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1112     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1113     // typedef void *(*kmpc_ctor)(void *);
1114     auto KmpcCtorTy =
1115         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1116                                 /*isVarArg*/ false)->getPointerTo();
1117     // typedef void *(*kmpc_cctor)(void *, void *);
1118     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1119     auto KmpcCopyCtorTy =
1120         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1121                                 /*isVarArg*/ false)->getPointerTo();
1122     // typedef void (*kmpc_dtor)(void *);
1123     auto KmpcDtorTy =
1124         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1125             ->getPointerTo();
1126     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1127                               KmpcCopyCtorTy, KmpcDtorTy};
1128     auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1129                                         /*isVarArg*/ false);
1130     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1131     break;
1132   }
1133   case OMPRTL__kmpc_end_critical: {
1134     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1135     // kmp_critical_name *crit);
1136     llvm::Type *TypeParams[] = {
1137         getIdentTyPointerTy(), CGM.Int32Ty,
1138         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1139     llvm::FunctionType *FnTy =
1140         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1141     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1142     break;
1143   }
1144   case OMPRTL__kmpc_cancel_barrier: {
1145     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1146     // global_tid);
1147     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1148     llvm::FunctionType *FnTy =
1149         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1150     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1151     break;
1152   }
1153   case OMPRTL__kmpc_barrier: {
1154     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1155     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1156     llvm::FunctionType *FnTy =
1157         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1158     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1159     break;
1160   }
1161   case OMPRTL__kmpc_for_static_fini: {
1162     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1163     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1164     llvm::FunctionType *FnTy =
1165         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1166     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1167     break;
1168   }
1169   case OMPRTL__kmpc_push_num_threads: {
1170     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1171     // kmp_int32 num_threads)
1172     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1173                                 CGM.Int32Ty};
1174     llvm::FunctionType *FnTy =
1175         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1176     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1177     break;
1178   }
1179   case OMPRTL__kmpc_serialized_parallel: {
1180     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1181     // global_tid);
1182     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1183     llvm::FunctionType *FnTy =
1184         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1185     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1186     break;
1187   }
1188   case OMPRTL__kmpc_end_serialized_parallel: {
1189     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1190     // global_tid);
1191     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1192     llvm::FunctionType *FnTy =
1193         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1194     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1195     break;
1196   }
1197   case OMPRTL__kmpc_flush: {
1198     // Build void __kmpc_flush(ident_t *loc);
1199     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1200     llvm::FunctionType *FnTy =
1201         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1202     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1203     break;
1204   }
1205   case OMPRTL__kmpc_master: {
1206     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1207     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1208     llvm::FunctionType *FnTy =
1209         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1210     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1211     break;
1212   }
1213   case OMPRTL__kmpc_end_master: {
1214     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1215     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1216     llvm::FunctionType *FnTy =
1217         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1218     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1219     break;
1220   }
1221   case OMPRTL__kmpc_omp_taskyield: {
1222     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1223     // int end_part);
1224     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1225     llvm::FunctionType *FnTy =
1226         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1227     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1228     break;
1229   }
1230   case OMPRTL__kmpc_single: {
1231     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1232     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1233     llvm::FunctionType *FnTy =
1234         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1235     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1236     break;
1237   }
1238   case OMPRTL__kmpc_end_single: {
1239     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1240     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1241     llvm::FunctionType *FnTy =
1242         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1243     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1244     break;
1245   }
1246   case OMPRTL__kmpc_omp_task_alloc: {
1247     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1248     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1249     // kmp_routine_entry_t *task_entry);
1250     assert(KmpRoutineEntryPtrTy != nullptr &&
1251            "Type kmp_routine_entry_t must be created.");
1252     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1253                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
1254     // Return void * and then cast to particular kmp_task_t type.
1255     llvm::FunctionType *FnTy =
1256         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1257     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
1258     break;
1259   }
1260   case OMPRTL__kmpc_omp_task: {
1261     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1262     // *new_task);
1263     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1264                                 CGM.VoidPtrTy};
1265     llvm::FunctionType *FnTy =
1266         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1267     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
1268     break;
1269   }
1270   case OMPRTL__kmpc_copyprivate: {
1271     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
1272     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
1273     // kmp_int32 didit);
1274     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1275     auto *CpyFnTy =
1276         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
1277     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
1278                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
1279                                 CGM.Int32Ty};
1280     llvm::FunctionType *FnTy =
1281         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1282     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
1283     break;
1284   }
1285   case OMPRTL__kmpc_reduce: {
1286     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
1287     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
1288     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
1289     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1290     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1291                                                /*isVarArg=*/false);
1292     llvm::Type *TypeParams[] = {
1293         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1294         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1295         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1296     llvm::FunctionType *FnTy =
1297         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1298     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
1299     break;
1300   }
1301   case OMPRTL__kmpc_reduce_nowait: {
1302     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
1303     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
1304     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
1305     // *lck);
1306     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1307     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1308                                                /*isVarArg=*/false);
1309     llvm::Type *TypeParams[] = {
1310         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1311         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1312         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1313     llvm::FunctionType *FnTy =
1314         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1315     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
1316     break;
1317   }
1318   case OMPRTL__kmpc_end_reduce: {
1319     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
1320     // kmp_critical_name *lck);
1321     llvm::Type *TypeParams[] = {
1322         getIdentTyPointerTy(), CGM.Int32Ty,
1323         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1324     llvm::FunctionType *FnTy =
1325         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1326     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
1327     break;
1328   }
1329   case OMPRTL__kmpc_end_reduce_nowait: {
1330     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
1331     // kmp_critical_name *lck);
1332     llvm::Type *TypeParams[] = {
1333         getIdentTyPointerTy(), CGM.Int32Ty,
1334         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1335     llvm::FunctionType *FnTy =
1336         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1337     RTLFn =
1338         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
1339     break;
1340   }
1341   case OMPRTL__kmpc_omp_task_begin_if0: {
1342     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1343     // *new_task);
1344     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1345                                 CGM.VoidPtrTy};
1346     llvm::FunctionType *FnTy =
1347         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1348     RTLFn =
1349         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
1350     break;
1351   }
1352   case OMPRTL__kmpc_omp_task_complete_if0: {
1353     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1354     // *new_task);
1355     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1356                                 CGM.VoidPtrTy};
1357     llvm::FunctionType *FnTy =
1358         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1359     RTLFn = CGM.CreateRuntimeFunction(FnTy,
1360                                       /*Name=*/"__kmpc_omp_task_complete_if0");
1361     break;
1362   }
1363   case OMPRTL__kmpc_ordered: {
1364     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
1365     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1366     llvm::FunctionType *FnTy =
1367         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1368     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
1369     break;
1370   }
1371   case OMPRTL__kmpc_end_ordered: {
1372     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
1373     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1374     llvm::FunctionType *FnTy =
1375         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1376     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
1377     break;
1378   }
1379   case OMPRTL__kmpc_omp_taskwait: {
1380     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
1381     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1382     llvm::FunctionType *FnTy =
1383         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1384     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
1385     break;
1386   }
1387   case OMPRTL__kmpc_taskgroup: {
1388     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
1389     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1390     llvm::FunctionType *FnTy =
1391         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1392     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
1393     break;
1394   }
1395   case OMPRTL__kmpc_end_taskgroup: {
1396     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
1397     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1398     llvm::FunctionType *FnTy =
1399         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1400     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
1401     break;
1402   }
1403   case OMPRTL__kmpc_push_proc_bind: {
1404     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
1405     // int proc_bind)
1406     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1407     llvm::FunctionType *FnTy =
1408         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1409     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
1410     break;
1411   }
1412   case OMPRTL__kmpc_omp_task_with_deps: {
1413     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
1414     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
1415     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
1416     llvm::Type *TypeParams[] = {
1417         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
1418         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
1419     llvm::FunctionType *FnTy =
1420         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1421     RTLFn =
1422         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
1423     break;
1424   }
1425   case OMPRTL__kmpc_omp_wait_deps: {
1426     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
1427     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
1428     // kmp_depend_info_t *noalias_dep_list);
1429     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1430                                 CGM.Int32Ty,           CGM.VoidPtrTy,
1431                                 CGM.Int32Ty,           CGM.VoidPtrTy};
1432     llvm::FunctionType *FnTy =
1433         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1434     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
1435     break;
1436   }
1437   case OMPRTL__kmpc_cancellationpoint: {
1438     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
1439     // global_tid, kmp_int32 cncl_kind)
1440     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1441     llvm::FunctionType *FnTy =
1442         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1443     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
1444     break;
1445   }
1446   case OMPRTL__kmpc_cancel: {
1447     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
1448     // kmp_int32 cncl_kind)
1449     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1450     llvm::FunctionType *FnTy =
1451         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1452     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
1453     break;
1454   }
1455   case OMPRTL__kmpc_push_num_teams: {
1456     // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
1457     // kmp_int32 num_teams, kmp_int32 num_threads)
1458     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1459         CGM.Int32Ty};
1460     llvm::FunctionType *FnTy =
1461         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1462     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
1463     break;
1464   }
1465   case OMPRTL__kmpc_fork_teams: {
1466     // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
1467     // microtask, ...);
1468     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1469                                 getKmpc_MicroPointerTy()};
1470     llvm::FunctionType *FnTy =
1471         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1472     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
1473     break;
1474   }
1475   case OMPRTL__kmpc_taskloop: {
1476     // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
1477     // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
1478     // sched, kmp_uint64 grainsize, void *task_dup);
1479     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
1480                                 CGM.IntTy,
1481                                 CGM.VoidPtrTy,
1482                                 CGM.IntTy,
1483                                 CGM.Int64Ty->getPointerTo(),
1484                                 CGM.Int64Ty->getPointerTo(),
1485                                 CGM.Int64Ty,
1486                                 CGM.IntTy,
1487                                 CGM.IntTy,
1488                                 CGM.Int64Ty,
1489                                 CGM.VoidPtrTy};
1490     llvm::FunctionType *FnTy =
1491         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1492     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
1493     break;
1494   }
1495   case OMPRTL__kmpc_doacross_init: {
1496     // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
1497     // num_dims, struct kmp_dim *dims);
1498     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
1499                                 CGM.Int32Ty,
1500                                 CGM.Int32Ty,
1501                                 CGM.VoidPtrTy};
1502     llvm::FunctionType *FnTy =
1503         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1504     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
1505     break;
1506   }
1507   case OMPRTL__kmpc_doacross_fini: {
1508     // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
1509     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1510     llvm::FunctionType *FnTy =
1511         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1512     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
1513     break;
1514   }
1515   case OMPRTL__kmpc_doacross_post: {
1516     // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
1517     // *vec);
1518     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1519                                 CGM.Int64Ty->getPointerTo()};
1520     llvm::FunctionType *FnTy =
1521         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1522     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
1523     break;
1524   }
1525   case OMPRTL__kmpc_doacross_wait: {
1526     // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
1527     // *vec);
1528     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1529                                 CGM.Int64Ty->getPointerTo()};
1530     llvm::FunctionType *FnTy =
1531         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1532     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
1533     break;
1534   }
1535   case OMPRTL__tgt_target: {
1536     // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
1537     // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
1538     // *arg_types);
1539     llvm::Type *TypeParams[] = {CGM.Int32Ty,
1540                                 CGM.VoidPtrTy,
1541                                 CGM.Int32Ty,
1542                                 CGM.VoidPtrPtrTy,
1543                                 CGM.VoidPtrPtrTy,
1544                                 CGM.SizeTy->getPointerTo(),
1545                                 CGM.Int32Ty->getPointerTo()};
1546     llvm::FunctionType *FnTy =
1547         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1548     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
1549     break;
1550   }
1551   case OMPRTL__tgt_target_teams: {
1552     // Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
1553     // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
1554     // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
1555     llvm::Type *TypeParams[] = {CGM.Int32Ty,
1556                                 CGM.VoidPtrTy,
1557                                 CGM.Int32Ty,
1558                                 CGM.VoidPtrPtrTy,
1559                                 CGM.VoidPtrPtrTy,
1560                                 CGM.SizeTy->getPointerTo(),
1561                                 CGM.Int32Ty->getPointerTo(),
1562                                 CGM.Int32Ty,
1563                                 CGM.Int32Ty};
1564     llvm::FunctionType *FnTy =
1565         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1566     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
1567     break;
1568   }
1569   case OMPRTL__tgt_register_lib: {
1570     // Build void __tgt_register_lib(__tgt_bin_desc *desc);
1571     QualType ParamTy =
1572         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
1573     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
1574     llvm::FunctionType *FnTy =
1575         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1576     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
1577     break;
1578   }
1579   case OMPRTL__tgt_unregister_lib: {
1580     // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
1581     QualType ParamTy =
1582         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
1583     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
1584     llvm::FunctionType *FnTy =
1585         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1586     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
1587     break;
1588   }
1589   case OMPRTL__tgt_target_data_begin: {
1590     // Build void __tgt_target_data_begin(int32_t device_id, int32_t arg_num,
1591     // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
1592     llvm::Type *TypeParams[] = {CGM.Int32Ty,
1593                                 CGM.Int32Ty,
1594                                 CGM.VoidPtrPtrTy,
1595                                 CGM.VoidPtrPtrTy,
1596                                 CGM.SizeTy->getPointerTo(),
1597                                 CGM.Int32Ty->getPointerTo()};
1598     llvm::FunctionType *FnTy =
1599         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1600     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
1601     break;
1602   }
1603   case OMPRTL__tgt_target_data_end: {
1604     // Build void __tgt_target_data_end(int32_t device_id, int32_t arg_num,
1605     // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
1606     llvm::Type *TypeParams[] = {CGM.Int32Ty,
1607                                 CGM.Int32Ty,
1608                                 CGM.VoidPtrPtrTy,
1609                                 CGM.VoidPtrPtrTy,
1610                                 CGM.SizeTy->getPointerTo(),
1611                                 CGM.Int32Ty->getPointerTo()};
1612     llvm::FunctionType *FnTy =
1613         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1614     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
1615     break;
1616   }
1617   case OMPRTL__tgt_target_data_update: {
1618     // Build void __tgt_target_data_update(int32_t device_id, int32_t arg_num,
1619     // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
1620     llvm::Type *TypeParams[] = {CGM.Int32Ty,
1621                                 CGM.Int32Ty,
1622                                 CGM.VoidPtrPtrTy,
1623                                 CGM.VoidPtrPtrTy,
1624                                 CGM.SizeTy->getPointerTo(),
1625                                 CGM.Int32Ty->getPointerTo()};
1626     llvm::FunctionType *FnTy =
1627         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1628     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
1629     break;
1630   }
1631   }
1632   assert(RTLFn && "Unable to find OpenMP runtime function");
1633   return RTLFn;
1634 }
1635 
1636 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
1637                                                              bool IVSigned) {
1638   assert((IVSize == 32 || IVSize == 64) &&
1639          "IV size is not compatible with the omp runtime");
1640   auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1641                                        : "__kmpc_for_static_init_4u")
1642                            : (IVSigned ? "__kmpc_for_static_init_8"
1643                                        : "__kmpc_for_static_init_8u");
1644   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1645   auto PtrTy = llvm::PointerType::getUnqual(ITy);
1646   llvm::Type *TypeParams[] = {
1647     getIdentTyPointerTy(),                     // loc
1648     CGM.Int32Ty,                               // tid
1649     CGM.Int32Ty,                               // schedtype
1650     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1651     PtrTy,                                     // p_lower
1652     PtrTy,                                     // p_upper
1653     PtrTy,                                     // p_stride
1654     ITy,                                       // incr
1655     ITy                                        // chunk
1656   };
1657   llvm::FunctionType *FnTy =
1658       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1659   return CGM.CreateRuntimeFunction(FnTy, Name);
1660 }
1661 
1662 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
1663                                                             bool IVSigned) {
1664   assert((IVSize == 32 || IVSize == 64) &&
1665          "IV size is not compatible with the omp runtime");
1666   auto Name =
1667       IVSize == 32
1668           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1669           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1670   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1671   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1672                                CGM.Int32Ty,           // tid
1673                                CGM.Int32Ty,           // schedtype
1674                                ITy,                   // lower
1675                                ITy,                   // upper
1676                                ITy,                   // stride
1677                                ITy                    // chunk
1678   };
1679   llvm::FunctionType *FnTy =
1680       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1681   return CGM.CreateRuntimeFunction(FnTy, Name);
1682 }
1683 
1684 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
1685                                                             bool IVSigned) {
1686   assert((IVSize == 32 || IVSize == 64) &&
1687          "IV size is not compatible with the omp runtime");
1688   auto Name =
1689       IVSize == 32
1690           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1691           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1692   llvm::Type *TypeParams[] = {
1693       getIdentTyPointerTy(), // loc
1694       CGM.Int32Ty,           // tid
1695   };
1696   llvm::FunctionType *FnTy =
1697       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1698   return CGM.CreateRuntimeFunction(FnTy, Name);
1699 }
1700 
1701 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
1702                                                             bool IVSigned) {
1703   assert((IVSize == 32 || IVSize == 64) &&
1704          "IV size is not compatible with the omp runtime");
1705   auto Name =
1706       IVSize == 32
1707           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1708           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1709   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1710   auto PtrTy = llvm::PointerType::getUnqual(ITy);
1711   llvm::Type *TypeParams[] = {
1712     getIdentTyPointerTy(),                     // loc
1713     CGM.Int32Ty,                               // tid
1714     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1715     PtrTy,                                     // p_lower
1716     PtrTy,                                     // p_upper
1717     PtrTy                                      // p_stride
1718   };
1719   llvm::FunctionType *FnTy =
1720       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1721   return CGM.CreateRuntimeFunction(FnTy, Name);
1722 }
1723 
1724 llvm::Constant *
1725 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1726   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1727          !CGM.getContext().getTargetInfo().isTLSSupported());
1728   // Lookup the entry, lazily creating it if necessary.
1729   return getOrCreateInternalVariable(CGM.Int8PtrPtrTy,
1730                                      Twine(CGM.getMangledName(VD)) + ".cache.");
1731 }
1732 
1733 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1734                                                 const VarDecl *VD,
1735                                                 Address VDAddr,
1736                                                 SourceLocation Loc) {
1737   if (CGM.getLangOpts().OpenMPUseTLS &&
1738       CGM.getContext().getTargetInfo().isTLSSupported())
1739     return VDAddr;
1740 
1741   auto VarTy = VDAddr.getElementType();
1742   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1743                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1744                                                        CGM.Int8PtrTy),
1745                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1746                          getOrCreateThreadPrivateCache(VD)};
1747   return Address(CGF.EmitRuntimeCall(
1748       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
1749                  VDAddr.getAlignment());
1750 }
1751 
1752 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1753     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1754     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1755   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1756   // library.
1757   auto OMPLoc = emitUpdateLocation(CGF, Loc);
1758   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1759                       OMPLoc);
1760   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1761   // to register constructor/destructor for variable.
1762   llvm::Value *Args[] = {OMPLoc,
1763                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1764                                                        CGM.VoidPtrTy),
1765                          Ctor, CopyCtor, Dtor};
1766   CGF.EmitRuntimeCall(
1767       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
1768 }
1769 
1770 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1771     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1772     bool PerformInit, CodeGenFunction *CGF) {
1773   if (CGM.getLangOpts().OpenMPUseTLS &&
1774       CGM.getContext().getTargetInfo().isTLSSupported())
1775     return nullptr;
1776 
1777   VD = VD->getDefinition(CGM.getContext());
1778   if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
1779     ThreadPrivateWithDefinition.insert(VD);
1780     QualType ASTTy = VD->getType();
1781 
1782     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1783     auto Init = VD->getAnyInitializer();
1784     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1785       // Generate function that re-emits the declaration's initializer into the
1786       // threadprivate copy of the variable VD
1787       CodeGenFunction CtorCGF(CGM);
1788       FunctionArgList Args;
1789       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
1790                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
1791       Args.push_back(&Dst);
1792 
1793       auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1794           CGM.getContext().VoidPtrTy, Args);
1795       auto FTy = CGM.getTypes().GetFunctionType(FI);
1796       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
1797           FTy, ".__kmpc_global_ctor_.", FI, Loc);
1798       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1799                             Args, SourceLocation());
1800       auto ArgVal = CtorCGF.EmitLoadOfScalar(
1801           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1802           CGM.getContext().VoidPtrTy, Dst.getLocation());
1803       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1804       Arg = CtorCGF.Builder.CreateElementBitCast(Arg,
1805                                              CtorCGF.ConvertTypeForMem(ASTTy));
1806       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1807                                /*IsInitializer=*/true);
1808       ArgVal = CtorCGF.EmitLoadOfScalar(
1809           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1810           CGM.getContext().VoidPtrTy, Dst.getLocation());
1811       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1812       CtorCGF.FinishFunction();
1813       Ctor = Fn;
1814     }
1815     if (VD->getType().isDestructedType() != QualType::DK_none) {
1816       // Generate function that emits destructor call for the threadprivate copy
1817       // of the variable VD
1818       CodeGenFunction DtorCGF(CGM);
1819       FunctionArgList Args;
1820       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
1821                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
1822       Args.push_back(&Dst);
1823 
1824       auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1825           CGM.getContext().VoidTy, Args);
1826       auto FTy = CGM.getTypes().GetFunctionType(FI);
1827       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
1828           FTy, ".__kmpc_global_dtor_.", FI, Loc);
1829       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1830       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1831                             SourceLocation());
1832       // Create a scope with an artificial location for the body of this function.
1833       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1834       auto ArgVal = DtorCGF.EmitLoadOfScalar(
1835           DtorCGF.GetAddrOfLocalVar(&Dst),
1836           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1837       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1838                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1839                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1840       DtorCGF.FinishFunction();
1841       Dtor = Fn;
1842     }
1843     // Do not emit init function if it is not required.
1844     if (!Ctor && !Dtor)
1845       return nullptr;
1846 
1847     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1848     auto CopyCtorTy =
1849         llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1850                                 /*isVarArg=*/false)->getPointerTo();
1851     // Copying constructor for the threadprivate variable.
1852     // Must be NULL - reserved by runtime, but currently it requires that this
1853     // parameter is always NULL. Otherwise it fires assertion.
1854     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1855     if (Ctor == nullptr) {
1856       auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1857                                             /*isVarArg=*/false)->getPointerTo();
1858       Ctor = llvm::Constant::getNullValue(CtorTy);
1859     }
1860     if (Dtor == nullptr) {
1861       auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1862                                             /*isVarArg=*/false)->getPointerTo();
1863       Dtor = llvm::Constant::getNullValue(DtorTy);
1864     }
1865     if (!CGF) {
1866       auto InitFunctionTy =
1867           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1868       auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
1869           InitFunctionTy, ".__omp_threadprivate_init_.",
1870           CGM.getTypes().arrangeNullaryFunction());
1871       CodeGenFunction InitCGF(CGM);
1872       FunctionArgList ArgList;
1873       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1874                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1875                             Loc);
1876       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1877       InitCGF.FinishFunction();
1878       return InitFunction;
1879     }
1880     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1881   }
1882   return nullptr;
1883 }
1884 
1885 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
1886 /// function. Here is the logic:
1887 /// if (Cond) {
1888 ///   ThenGen();
1889 /// } else {
1890 ///   ElseGen();
1891 /// }
1892 static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
1893                             const RegionCodeGenTy &ThenGen,
1894                             const RegionCodeGenTy &ElseGen) {
1895   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1896 
1897   // If the condition constant folds and can be elided, try to avoid emitting
1898   // the condition and the dead arm of the if/else.
1899   bool CondConstant;
1900   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1901     if (CondConstant)
1902       ThenGen(CGF);
1903     else
1904       ElseGen(CGF);
1905     return;
1906   }
1907 
1908   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
1909   // emit the conditional branch.
1910   auto ThenBlock = CGF.createBasicBlock("omp_if.then");
1911   auto ElseBlock = CGF.createBasicBlock("omp_if.else");
1912   auto ContBlock = CGF.createBasicBlock("omp_if.end");
1913   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1914 
1915   // Emit the 'then' code.
1916   CGF.EmitBlock(ThenBlock);
1917   ThenGen(CGF);
1918   CGF.EmitBranch(ContBlock);
1919   // Emit the 'else' code if present.
1920   // There is no need to emit line number for unconditional branch.
1921   (void)ApplyDebugLocation::CreateEmpty(CGF);
1922   CGF.EmitBlock(ElseBlock);
1923   ElseGen(CGF);
1924   // There is no need to emit line number for unconditional branch.
1925   (void)ApplyDebugLocation::CreateEmpty(CGF);
1926   CGF.EmitBranch(ContBlock);
1927   // Emit the continuation block for code after the if.
1928   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1929 }
1930 
1931 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1932                                        llvm::Value *OutlinedFn,
1933                                        ArrayRef<llvm::Value *> CapturedVars,
1934                                        const Expr *IfCond) {
1935   if (!CGF.HaveInsertPoint())
1936     return;
1937   auto *RTLoc = emitUpdateLocation(CGF, Loc);
1938   auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
1939                                                      PrePostActionTy &) {
1940     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1941     auto &RT = CGF.CGM.getOpenMPRuntime();
1942     llvm::Value *Args[] = {
1943         RTLoc,
1944         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1945         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
1946     llvm::SmallVector<llvm::Value *, 16> RealArgs;
1947     RealArgs.append(std::begin(Args), std::end(Args));
1948     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1949 
1950     auto RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
1951     CGF.EmitRuntimeCall(RTLFn, RealArgs);
1952   };
1953   auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
1954                                                           PrePostActionTy &) {
1955     auto &RT = CGF.CGM.getOpenMPRuntime();
1956     auto ThreadID = RT.getThreadID(CGF, Loc);
1957     // Build calls:
1958     // __kmpc_serialized_parallel(&Loc, GTid);
1959     llvm::Value *Args[] = {RTLoc, ThreadID};
1960     CGF.EmitRuntimeCall(
1961         RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
1962 
1963     // OutlinedFn(&GTid, &zero, CapturedStruct);
1964     auto ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1965     Address ZeroAddr =
1966         CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
1967                              /*Name*/ ".zero.addr");
1968     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
1969     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
1970     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
1971     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
1972     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1973     CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
1974 
1975     // __kmpc_end_serialized_parallel(&Loc, GTid);
1976     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1977     CGF.EmitRuntimeCall(
1978         RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
1979         EndArgs);
1980   };
1981   if (IfCond)
1982     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
1983   else {
1984     RegionCodeGenTy ThenRCG(ThenGen);
1985     ThenRCG(CGF);
1986   }
1987 }
1988 
1989 // If we're inside an (outlined) parallel region, use the region info's
1990 // thread-ID variable (it is passed in a first argument of the outlined function
1991 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1992 // regular serial code region, get thread ID by calling kmp_int32
1993 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1994 // return the address of that temp.
1995 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
1996                                              SourceLocation Loc) {
1997   if (auto *OMPRegionInfo =
1998           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1999     if (OMPRegionInfo->getThreadIDVariable())
2000       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
2001 
2002   auto ThreadID = getThreadID(CGF, Loc);
2003   auto Int32Ty =
2004       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2005   auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2006   CGF.EmitStoreOfScalar(ThreadID,
2007                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2008 
2009   return ThreadIDTemp;
2010 }
2011 
2012 llvm::Constant *
2013 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
2014                                              const llvm::Twine &Name) {
2015   SmallString<256> Buffer;
2016   llvm::raw_svector_ostream Out(Buffer);
2017   Out << Name;
2018   auto RuntimeName = Out.str();
2019   auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
2020   if (Elem.second) {
2021     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2022            "OMP internal variable has different type than requested");
2023     return &*Elem.second;
2024   }
2025 
2026   return Elem.second = new llvm::GlobalVariable(
2027              CGM.getModule(), Ty, /*IsConstant*/ false,
2028              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2029              Elem.first());
2030 }
2031 
2032 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2033   llvm::Twine Name(".gomp_critical_user_", CriticalName);
2034   return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
2035 }
2036 
2037 namespace {
2038 /// Common pre(post)-action for different OpenMP constructs.
2039 class CommonActionTy final : public PrePostActionTy {
2040   llvm::Value *EnterCallee;
2041   ArrayRef<llvm::Value *> EnterArgs;
2042   llvm::Value *ExitCallee;
2043   ArrayRef<llvm::Value *> ExitArgs;
2044   bool Conditional;
2045   llvm::BasicBlock *ContBlock = nullptr;
2046 
2047 public:
2048   CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs,
2049                  llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs,
2050                  bool Conditional = false)
2051       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2052         ExitArgs(ExitArgs), Conditional(Conditional) {}
2053   void Enter(CodeGenFunction &CGF) override {
2054     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2055     if (Conditional) {
2056       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2057       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2058       ContBlock = CGF.createBasicBlock("omp_if.end");
2059       // Generate the branch (If-stmt)
2060       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2061       CGF.EmitBlock(ThenBlock);
2062     }
2063   }
2064   void Done(CodeGenFunction &CGF) {
2065     // Emit the rest of blocks/branches
2066     CGF.EmitBranch(ContBlock);
2067     CGF.EmitBlock(ContBlock, true);
2068   }
2069   void Exit(CodeGenFunction &CGF) override {
2070     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2071   }
2072 };
2073 } // anonymous namespace
2074 
2075 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2076                                          StringRef CriticalName,
2077                                          const RegionCodeGenTy &CriticalOpGen,
2078                                          SourceLocation Loc, const Expr *Hint) {
2079   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2080   // CriticalOpGen();
2081   // __kmpc_end_critical(ident_t *, gtid, Lock);
2082   // Prepare arguments and build a call to __kmpc_critical
2083   if (!CGF.HaveInsertPoint())
2084     return;
2085   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2086                          getCriticalRegionLock(CriticalName)};
2087   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2088                                                 std::end(Args));
2089   if (Hint) {
2090     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2091         CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
2092   }
2093   CommonActionTy Action(
2094       createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
2095                                  : OMPRTL__kmpc_critical),
2096       EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
2097   CriticalOpGen.setAction(Action);
2098   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2099 }
2100 
2101 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2102                                        const RegionCodeGenTy &MasterOpGen,
2103                                        SourceLocation Loc) {
2104   if (!CGF.HaveInsertPoint())
2105     return;
2106   // if(__kmpc_master(ident_t *, gtid)) {
2107   //   MasterOpGen();
2108   //   __kmpc_end_master(ident_t *, gtid);
2109   // }
2110   // Prepare arguments and build a call to __kmpc_master
2111   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2112   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
2113                         createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
2114                         /*Conditional=*/true);
2115   MasterOpGen.setAction(Action);
2116   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2117   Action.Done(CGF);
2118 }
2119 
2120 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2121                                         SourceLocation Loc) {
2122   if (!CGF.HaveInsertPoint())
2123     return;
2124   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2125   llvm::Value *Args[] = {
2126       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2127       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2128   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
2129   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2130     Region->emitUntiedSwitch(CGF);
2131 }
2132 
2133 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2134                                           const RegionCodeGenTy &TaskgroupOpGen,
2135                                           SourceLocation Loc) {
2136   if (!CGF.HaveInsertPoint())
2137     return;
2138   // __kmpc_taskgroup(ident_t *, gtid);
2139   // TaskgroupOpGen();
2140   // __kmpc_end_taskgroup(ident_t *, gtid);
2141   // Prepare arguments and build a call to __kmpc_taskgroup
2142   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2143   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
2144                         createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
2145                         Args);
2146   TaskgroupOpGen.setAction(Action);
2147   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2148 }
2149 
2150 /// Given an array of pointers to variables, project the address of a
2151 /// given variable.
2152 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2153                                       unsigned Index, const VarDecl *Var) {
2154   // Pull out the pointer to the variable.
2155   Address PtrAddr =
2156       CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize());
2157   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2158 
2159   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2160   Addr = CGF.Builder.CreateElementBitCast(
2161       Addr, CGF.ConvertTypeForMem(Var->getType()));
2162   return Addr;
2163 }
2164 
2165 static llvm::Value *emitCopyprivateCopyFunction(
2166     CodeGenModule &CGM, llvm::Type *ArgsType,
2167     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2168     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) {
2169   auto &C = CGM.getContext();
2170   // void copy_func(void *LHSArg, void *RHSArg);
2171   FunctionArgList Args;
2172   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
2173                            C.VoidPtrTy);
2174   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
2175                            C.VoidPtrTy);
2176   Args.push_back(&LHSArg);
2177   Args.push_back(&RHSArg);
2178   auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2179   auto *Fn = llvm::Function::Create(
2180       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
2181       ".omp.copyprivate.copy_func", &CGM.getModule());
2182   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
2183   CodeGenFunction CGF(CGM);
2184   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
2185   // Dest = (void*[n])(LHSArg);
2186   // Src = (void*[n])(RHSArg);
2187   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2188       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2189       ArgsType), CGF.getPointerAlign());
2190   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2191       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2192       ArgsType), CGF.getPointerAlign());
2193   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2194   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2195   // ...
2196   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2197   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2198     auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2199     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2200 
2201     auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2202     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2203 
2204     auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2205     QualType Type = VD->getType();
2206     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2207   }
2208   CGF.FinishFunction();
2209   return Fn;
2210 }
2211 
2212 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2213                                        const RegionCodeGenTy &SingleOpGen,
2214                                        SourceLocation Loc,
2215                                        ArrayRef<const Expr *> CopyprivateVars,
2216                                        ArrayRef<const Expr *> SrcExprs,
2217                                        ArrayRef<const Expr *> DstExprs,
2218                                        ArrayRef<const Expr *> AssignmentOps) {
2219   if (!CGF.HaveInsertPoint())
2220     return;
2221   assert(CopyprivateVars.size() == SrcExprs.size() &&
2222          CopyprivateVars.size() == DstExprs.size() &&
2223          CopyprivateVars.size() == AssignmentOps.size());
2224   auto &C = CGM.getContext();
2225   // int32 did_it = 0;
2226   // if(__kmpc_single(ident_t *, gtid)) {
2227   //   SingleOpGen();
2228   //   __kmpc_end_single(ident_t *, gtid);
2229   //   did_it = 1;
2230   // }
2231   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2232   // <copy_func>, did_it);
2233 
2234   Address DidIt = Address::invalid();
2235   if (!CopyprivateVars.empty()) {
2236     // int32 did_it = 0;
2237     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2238     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2239     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2240   }
2241   // Prepare arguments and build a call to __kmpc_single
2242   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2243   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
2244                         createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
2245                         /*Conditional=*/true);
2246   SingleOpGen.setAction(Action);
2247   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2248   if (DidIt.isValid()) {
2249     // did_it = 1;
2250     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2251   }
2252   Action.Done(CGF);
2253   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2254   // <copy_func>, did_it);
2255   if (DidIt.isValid()) {
2256     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2257     auto CopyprivateArrayTy =
2258         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
2259                                /*IndexTypeQuals=*/0);
2260     // Create a list of all private variables for copyprivate.
2261     Address CopyprivateList =
2262         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2263     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2264       Address Elem = CGF.Builder.CreateConstArrayGEP(
2265           CopyprivateList, I, CGF.getPointerSize());
2266       CGF.Builder.CreateStore(
2267           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2268               CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
2269           Elem);
2270     }
2271     // Build function that copies private values from single region to all other
2272     // threads in the corresponding parallel region.
2273     auto *CpyFn = emitCopyprivateCopyFunction(
2274         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2275         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
2276     auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2277     Address CL =
2278       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2279                                                       CGF.VoidPtrTy);
2280     auto *DidItVal = CGF.Builder.CreateLoad(DidIt);
2281     llvm::Value *Args[] = {
2282         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2283         getThreadID(CGF, Loc),        // i32 <gtid>
2284         BufSize,                      // size_t <buf_size>
2285         CL.getPointer(),              // void *<copyprivate list>
2286         CpyFn,                        // void (*) (void *, void *) <copy_func>
2287         DidItVal                      // i32 did_it
2288     };
2289     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
2290   }
2291 }
2292 
2293 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2294                                         const RegionCodeGenTy &OrderedOpGen,
2295                                         SourceLocation Loc, bool IsThreads) {
2296   if (!CGF.HaveInsertPoint())
2297     return;
2298   // __kmpc_ordered(ident_t *, gtid);
2299   // OrderedOpGen();
2300   // __kmpc_end_ordered(ident_t *, gtid);
2301   // Prepare arguments and build a call to __kmpc_ordered
2302   if (IsThreads) {
2303     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2304     CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
2305                           createRuntimeFunction(OMPRTL__kmpc_end_ordered),
2306                           Args);
2307     OrderedOpGen.setAction(Action);
2308     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2309     return;
2310   }
2311   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2312 }
2313 
2314 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2315                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2316                                       bool ForceSimpleCall) {
2317   if (!CGF.HaveInsertPoint())
2318     return;
2319   // Build call __kmpc_cancel_barrier(loc, thread_id);
2320   // Build call __kmpc_barrier(loc, thread_id);
2321   unsigned Flags;
2322   if (Kind == OMPD_for)
2323     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2324   else if (Kind == OMPD_sections)
2325     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2326   else if (Kind == OMPD_single)
2327     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2328   else if (Kind == OMPD_barrier)
2329     Flags = OMP_IDENT_BARRIER_EXPL;
2330   else
2331     Flags = OMP_IDENT_BARRIER_IMPL;
2332   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2333   // thread_id);
2334   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2335                          getThreadID(CGF, Loc)};
2336   if (auto *OMPRegionInfo =
2337           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
2338     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2339       auto *Result = CGF.EmitRuntimeCall(
2340           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
2341       if (EmitChecks) {
2342         // if (__kmpc_cancel_barrier()) {
2343         //   exit from construct;
2344         // }
2345         auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
2346         auto *ContBB = CGF.createBasicBlock(".cancel.continue");
2347         auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
2348         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2349         CGF.EmitBlock(ExitBB);
2350         //   exit from construct;
2351         auto CancelDestination =
2352             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2353         CGF.EmitBranchThroughCleanup(CancelDestination);
2354         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2355       }
2356       return;
2357     }
2358   }
2359   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
2360 }
2361 
2362 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
2363 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2364                                           bool Chunked, bool Ordered) {
2365   switch (ScheduleKind) {
2366   case OMPC_SCHEDULE_static:
2367     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2368                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2369   case OMPC_SCHEDULE_dynamic:
2370     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2371   case OMPC_SCHEDULE_guided:
2372     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2373   case OMPC_SCHEDULE_runtime:
2374     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2375   case OMPC_SCHEDULE_auto:
2376     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2377   case OMPC_SCHEDULE_unknown:
2378     assert(!Chunked && "chunk was specified but schedule kind not known");
2379     return Ordered ? OMP_ord_static : OMP_sch_static;
2380   }
2381   llvm_unreachable("Unexpected runtime schedule");
2382 }
2383 
2384 /// \brief Map the OpenMP distribute schedule to the runtime enumeration.
2385 static OpenMPSchedType
2386 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2387   // only static is allowed for dist_schedule
2388   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2389 }
2390 
2391 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2392                                          bool Chunked) const {
2393   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2394   return Schedule == OMP_sch_static;
2395 }
2396 
2397 bool CGOpenMPRuntime::isStaticNonchunked(
2398     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2399   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2400   return Schedule == OMP_dist_sch_static;
2401 }
2402 
2403 
2404 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2405   auto Schedule =
2406       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2407   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2408   return Schedule != OMP_sch_static;
2409 }
2410 
2411 static int addMonoNonMonoModifier(OpenMPSchedType Schedule,
2412                                   OpenMPScheduleClauseModifier M1,
2413                                   OpenMPScheduleClauseModifier M2) {
2414   int Modifier = 0;
2415   switch (M1) {
2416   case OMPC_SCHEDULE_MODIFIER_monotonic:
2417     Modifier = OMP_sch_modifier_monotonic;
2418     break;
2419   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2420     Modifier = OMP_sch_modifier_nonmonotonic;
2421     break;
2422   case OMPC_SCHEDULE_MODIFIER_simd:
2423     if (Schedule == OMP_sch_static_chunked)
2424       Schedule = OMP_sch_static_balanced_chunked;
2425     break;
2426   case OMPC_SCHEDULE_MODIFIER_last:
2427   case OMPC_SCHEDULE_MODIFIER_unknown:
2428     break;
2429   }
2430   switch (M2) {
2431   case OMPC_SCHEDULE_MODIFIER_monotonic:
2432     Modifier = OMP_sch_modifier_monotonic;
2433     break;
2434   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2435     Modifier = OMP_sch_modifier_nonmonotonic;
2436     break;
2437   case OMPC_SCHEDULE_MODIFIER_simd:
2438     if (Schedule == OMP_sch_static_chunked)
2439       Schedule = OMP_sch_static_balanced_chunked;
2440     break;
2441   case OMPC_SCHEDULE_MODIFIER_last:
2442   case OMPC_SCHEDULE_MODIFIER_unknown:
2443     break;
2444   }
2445   return Schedule | Modifier;
2446 }
2447 
2448 void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF,
2449                                           SourceLocation Loc,
2450                                           const OpenMPScheduleTy &ScheduleKind,
2451                                           unsigned IVSize, bool IVSigned,
2452                                           bool Ordered, llvm::Value *UB,
2453                                           llvm::Value *Chunk) {
2454   if (!CGF.HaveInsertPoint())
2455     return;
2456   OpenMPSchedType Schedule =
2457       getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered);
2458   assert(Ordered ||
2459          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2460           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2461           Schedule != OMP_sch_static_balanced_chunked));
2462   // Call __kmpc_dispatch_init(
2463   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2464   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2465   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2466 
2467   // If the Chunk was not specified in the clause - use default value 1.
2468   if (Chunk == nullptr)
2469     Chunk = CGF.Builder.getIntN(IVSize, 1);
2470   llvm::Value *Args[] = {
2471       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2472       CGF.Builder.getInt32(addMonoNonMonoModifier(
2473           Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2474       CGF.Builder.getIntN(IVSize, 0),                   // Lower
2475       UB,                                               // Upper
2476       CGF.Builder.getIntN(IVSize, 1),                   // Stride
2477       Chunk                                             // Chunk
2478   };
2479   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2480 }
2481 
2482 static void emitForStaticInitCall(
2483     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2484     llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule,
2485     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2486     unsigned IVSize, bool Ordered, Address IL, Address LB, Address UB,
2487     Address ST, llvm::Value *Chunk) {
2488   if (!CGF.HaveInsertPoint())
2489      return;
2490 
2491    assert(!Ordered);
2492    assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2493           Schedule == OMP_sch_static_balanced_chunked ||
2494           Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2495           Schedule == OMP_dist_sch_static ||
2496           Schedule == OMP_dist_sch_static_chunked);
2497 
2498    // Call __kmpc_for_static_init(
2499    //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2500    //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2501    //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2502    //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2503    if (Chunk == nullptr) {
2504      assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2505              Schedule == OMP_dist_sch_static) &&
2506             "expected static non-chunked schedule");
2507      // If the Chunk was not specified in the clause - use default value 1.
2508        Chunk = CGF.Builder.getIntN(IVSize, 1);
2509    } else {
2510      assert((Schedule == OMP_sch_static_chunked ||
2511              Schedule == OMP_sch_static_balanced_chunked ||
2512              Schedule == OMP_ord_static_chunked ||
2513              Schedule == OMP_dist_sch_static_chunked) &&
2514             "expected static chunked schedule");
2515    }
2516    llvm::Value *Args[] = {
2517        UpdateLocation, ThreadId, CGF.Builder.getInt32(addMonoNonMonoModifier(
2518                                      Schedule, M1, M2)), // Schedule type
2519        IL.getPointer(),                                  // &isLastIter
2520        LB.getPointer(),                                  // &LB
2521        UB.getPointer(),                                  // &UB
2522        ST.getPointer(),                                  // &Stride
2523        CGF.Builder.getIntN(IVSize, 1),                   // Incr
2524        Chunk                                             // Chunk
2525    };
2526    CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2527 }
2528 
2529 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2530                                         SourceLocation Loc,
2531                                         const OpenMPScheduleTy &ScheduleKind,
2532                                         unsigned IVSize, bool IVSigned,
2533                                         bool Ordered, Address IL, Address LB,
2534                                         Address UB, Address ST,
2535                                         llvm::Value *Chunk) {
2536   OpenMPSchedType ScheduleNum =
2537       getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered);
2538   auto *UpdatedLocation = emitUpdateLocation(CGF, Loc);
2539   auto *ThreadId = getThreadID(CGF, Loc);
2540   auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned);
2541   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2542                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, IVSize,
2543                         Ordered, IL, LB, UB, ST, Chunk);
2544 }
2545 
2546 void CGOpenMPRuntime::emitDistributeStaticInit(
2547     CodeGenFunction &CGF, SourceLocation Loc,
2548     OpenMPDistScheduleClauseKind SchedKind, unsigned IVSize, bool IVSigned,
2549     bool Ordered, Address IL, Address LB, Address UB, Address ST,
2550     llvm::Value *Chunk) {
2551   OpenMPSchedType ScheduleNum = getRuntimeSchedule(SchedKind, Chunk != nullptr);
2552   auto *UpdatedLocation = emitUpdateLocation(CGF, Loc);
2553   auto *ThreadId = getThreadID(CGF, Loc);
2554   auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned);
2555   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2556                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2557                         OMPC_SCHEDULE_MODIFIER_unknown, IVSize, Ordered, IL, LB,
2558                         UB, ST, Chunk);
2559 }
2560 
2561 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2562                                           SourceLocation Loc) {
2563   if (!CGF.HaveInsertPoint())
2564     return;
2565   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2566   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2567   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
2568                       Args);
2569 }
2570 
2571 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2572                                                  SourceLocation Loc,
2573                                                  unsigned IVSize,
2574                                                  bool IVSigned) {
2575   if (!CGF.HaveInsertPoint())
2576     return;
2577   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2578   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2579   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2580 }
2581 
2582 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2583                                           SourceLocation Loc, unsigned IVSize,
2584                                           bool IVSigned, Address IL,
2585                                           Address LB, Address UB,
2586                                           Address ST) {
2587   // Call __kmpc_dispatch_next(
2588   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2589   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2590   //          kmp_int[32|64] *p_stride);
2591   llvm::Value *Args[] = {
2592       emitUpdateLocation(CGF, Loc),
2593       getThreadID(CGF, Loc),
2594       IL.getPointer(), // &isLastIter
2595       LB.getPointer(), // &Lower
2596       UB.getPointer(), // &Upper
2597       ST.getPointer()  // &Stride
2598   };
2599   llvm::Value *Call =
2600       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2601   return CGF.EmitScalarConversion(
2602       Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true),
2603       CGF.getContext().BoolTy, Loc);
2604 }
2605 
2606 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2607                                            llvm::Value *NumThreads,
2608                                            SourceLocation Loc) {
2609   if (!CGF.HaveInsertPoint())
2610     return;
2611   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2612   llvm::Value *Args[] = {
2613       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2614       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2615   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
2616                       Args);
2617 }
2618 
2619 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2620                                          OpenMPProcBindClauseKind ProcBind,
2621                                          SourceLocation Loc) {
2622   if (!CGF.HaveInsertPoint())
2623     return;
2624   // Constants for proc bind value accepted by the runtime.
2625   enum ProcBindTy {
2626     ProcBindFalse = 0,
2627     ProcBindTrue,
2628     ProcBindMaster,
2629     ProcBindClose,
2630     ProcBindSpread,
2631     ProcBindIntel,
2632     ProcBindDefault
2633   } RuntimeProcBind;
2634   switch (ProcBind) {
2635   case OMPC_PROC_BIND_master:
2636     RuntimeProcBind = ProcBindMaster;
2637     break;
2638   case OMPC_PROC_BIND_close:
2639     RuntimeProcBind = ProcBindClose;
2640     break;
2641   case OMPC_PROC_BIND_spread:
2642     RuntimeProcBind = ProcBindSpread;
2643     break;
2644   case OMPC_PROC_BIND_unknown:
2645     llvm_unreachable("Unsupported proc_bind value.");
2646   }
2647   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2648   llvm::Value *Args[] = {
2649       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2650       llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
2651   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
2652 }
2653 
2654 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2655                                 SourceLocation Loc) {
2656   if (!CGF.HaveInsertPoint())
2657     return;
2658   // Build call void __kmpc_flush(ident_t *loc)
2659   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
2660                       emitUpdateLocation(CGF, Loc));
2661 }
2662 
2663 namespace {
2664 /// \brief Indexes of fields for type kmp_task_t.
2665 enum KmpTaskTFields {
2666   /// \brief List of shared variables.
2667   KmpTaskTShareds,
2668   /// \brief Task routine.
2669   KmpTaskTRoutine,
2670   /// \brief Partition id for the untied tasks.
2671   KmpTaskTPartId,
2672   /// Function with call of destructors for private variables.
2673   Data1,
2674   /// Task priority.
2675   Data2,
2676   /// (Taskloops only) Lower bound.
2677   KmpTaskTLowerBound,
2678   /// (Taskloops only) Upper bound.
2679   KmpTaskTUpperBound,
2680   /// (Taskloops only) Stride.
2681   KmpTaskTStride,
2682   /// (Taskloops only) Is last iteration flag.
2683   KmpTaskTLastIter,
2684 };
2685 } // anonymous namespace
2686 
2687 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2688   // FIXME: Add other entries type when they become supported.
2689   return OffloadEntriesTargetRegion.empty();
2690 }
2691 
2692 /// \brief Initialize target region entry.
2693 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2694     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2695                                     StringRef ParentName, unsigned LineNum,
2696                                     unsigned Order) {
2697   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2698                                              "only required for the device "
2699                                              "code generation.");
2700   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2701       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr);
2702   ++OffloadingEntriesNum;
2703 }
2704 
2705 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2706     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2707                                   StringRef ParentName, unsigned LineNum,
2708                                   llvm::Constant *Addr, llvm::Constant *ID) {
2709   // If we are emitting code for a target, the entry is already initialized,
2710   // only has to be registered.
2711   if (CGM.getLangOpts().OpenMPIsDevice) {
2712     assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
2713            "Entry must exist.");
2714     auto &Entry =
2715         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
2716     assert(Entry.isValid() && "Entry not initialized!");
2717     Entry.setAddress(Addr);
2718     Entry.setID(ID);
2719     return;
2720   } else {
2721     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID);
2722     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
2723   }
2724 }
2725 
2726 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
2727     unsigned DeviceID, unsigned FileID, StringRef ParentName,
2728     unsigned LineNum) const {
2729   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
2730   if (PerDevice == OffloadEntriesTargetRegion.end())
2731     return false;
2732   auto PerFile = PerDevice->second.find(FileID);
2733   if (PerFile == PerDevice->second.end())
2734     return false;
2735   auto PerParentName = PerFile->second.find(ParentName);
2736   if (PerParentName == PerFile->second.end())
2737     return false;
2738   auto PerLine = PerParentName->second.find(LineNum);
2739   if (PerLine == PerParentName->second.end())
2740     return false;
2741   // Fail if this entry is already registered.
2742   if (PerLine->second.getAddress() || PerLine->second.getID())
2743     return false;
2744   return true;
2745 }
2746 
2747 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
2748     const OffloadTargetRegionEntryInfoActTy &Action) {
2749   // Scan all target region entries and perform the provided action.
2750   for (auto &D : OffloadEntriesTargetRegion)
2751     for (auto &F : D.second)
2752       for (auto &P : F.second)
2753         for (auto &L : P.second)
2754           Action(D.first, F.first, P.first(), L.first, L.second);
2755 }
2756 
2757 /// \brief Create a Ctor/Dtor-like function whose body is emitted through
2758 /// \a Codegen. This is used to emit the two functions that register and
2759 /// unregister the descriptor of the current compilation unit.
2760 static llvm::Function *
2761 createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name,
2762                                          const RegionCodeGenTy &Codegen) {
2763   auto &C = CGM.getContext();
2764   FunctionArgList Args;
2765   ImplicitParamDecl DummyPtr(C, /*DC=*/nullptr, SourceLocation(),
2766                              /*Id=*/nullptr, C.VoidPtrTy);
2767   Args.push_back(&DummyPtr);
2768 
2769   CodeGenFunction CGF(CGM);
2770   GlobalDecl();
2771   auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2772   auto FTy = CGM.getTypes().GetFunctionType(FI);
2773   auto *Fn =
2774       CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation());
2775   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args, SourceLocation());
2776   Codegen(CGF);
2777   CGF.FinishFunction();
2778   return Fn;
2779 }
2780 
2781 llvm::Function *
2782 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
2783 
2784   // If we don't have entries or if we are emitting code for the device, we
2785   // don't need to do anything.
2786   if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
2787     return nullptr;
2788 
2789   auto &M = CGM.getModule();
2790   auto &C = CGM.getContext();
2791 
2792   // Get list of devices we care about
2793   auto &Devices = CGM.getLangOpts().OMPTargetTriples;
2794 
2795   // We should be creating an offloading descriptor only if there are devices
2796   // specified.
2797   assert(!Devices.empty() && "No OpenMP offloading devices??");
2798 
2799   // Create the external variables that will point to the begin and end of the
2800   // host entries section. These will be defined by the linker.
2801   auto *OffloadEntryTy =
2802       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
2803   llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable(
2804       M, OffloadEntryTy, /*isConstant=*/true,
2805       llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
2806       ".omp_offloading.entries_begin");
2807   llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable(
2808       M, OffloadEntryTy, /*isConstant=*/true,
2809       llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
2810       ".omp_offloading.entries_end");
2811 
2812   // Create all device images
2813   llvm::SmallVector<llvm::Constant *, 4> DeviceImagesEntires;
2814   auto *DeviceImageTy = cast<llvm::StructType>(
2815       CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy()));
2816 
2817   for (unsigned i = 0; i < Devices.size(); ++i) {
2818     StringRef T = Devices[i].getTriple();
2819     auto *ImgBegin = new llvm::GlobalVariable(
2820         M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
2821         /*Initializer=*/nullptr,
2822         Twine(".omp_offloading.img_start.") + Twine(T));
2823     auto *ImgEnd = new llvm::GlobalVariable(
2824         M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
2825         /*Initializer=*/nullptr, Twine(".omp_offloading.img_end.") + Twine(T));
2826 
2827     llvm::Constant *Dev =
2828         llvm::ConstantStruct::get(DeviceImageTy, ImgBegin, ImgEnd,
2829                                   HostEntriesBegin, HostEntriesEnd, nullptr);
2830     DeviceImagesEntires.push_back(Dev);
2831   }
2832 
2833   // Create device images global array.
2834   llvm::ArrayType *DeviceImagesInitTy =
2835       llvm::ArrayType::get(DeviceImageTy, DeviceImagesEntires.size());
2836   llvm::Constant *DeviceImagesInit =
2837       llvm::ConstantArray::get(DeviceImagesInitTy, DeviceImagesEntires);
2838 
2839   llvm::GlobalVariable *DeviceImages = new llvm::GlobalVariable(
2840       M, DeviceImagesInitTy, /*isConstant=*/true,
2841       llvm::GlobalValue::InternalLinkage, DeviceImagesInit,
2842       ".omp_offloading.device_images");
2843   DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
2844 
2845   // This is a Zero array to be used in the creation of the constant expressions
2846   llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
2847                              llvm::Constant::getNullValue(CGM.Int32Ty)};
2848 
2849   // Create the target region descriptor.
2850   auto *BinaryDescriptorTy = cast<llvm::StructType>(
2851       CGM.getTypes().ConvertTypeForMem(getTgtBinaryDescriptorQTy()));
2852   llvm::Constant *TargetRegionsDescriptorInit = llvm::ConstantStruct::get(
2853       BinaryDescriptorTy, llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()),
2854       llvm::ConstantExpr::getGetElementPtr(DeviceImagesInitTy, DeviceImages,
2855                                            Index),
2856       HostEntriesBegin, HostEntriesEnd, nullptr);
2857 
2858   auto *Desc = new llvm::GlobalVariable(
2859       M, BinaryDescriptorTy, /*isConstant=*/true,
2860       llvm::GlobalValue::InternalLinkage, TargetRegionsDescriptorInit,
2861       ".omp_offloading.descriptor");
2862 
2863   // Emit code to register or unregister the descriptor at execution
2864   // startup or closing, respectively.
2865 
2866   // Create a variable to drive the registration and unregistration of the
2867   // descriptor, so we can reuse the logic that emits Ctors and Dtors.
2868   auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var");
2869   ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(),
2870                                 IdentInfo, C.CharTy);
2871 
2872   auto *UnRegFn = createOffloadingBinaryDescriptorFunction(
2873       CGM, ".omp_offloading.descriptor_unreg",
2874       [&](CodeGenFunction &CGF, PrePostActionTy &) {
2875         CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
2876                              Desc);
2877       });
2878   auto *RegFn = createOffloadingBinaryDescriptorFunction(
2879       CGM, ".omp_offloading.descriptor_reg",
2880       [&](CodeGenFunction &CGF, PrePostActionTy &) {
2881         CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib),
2882                              Desc);
2883         CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
2884       });
2885   return RegFn;
2886 }
2887 
2888 void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *ID,
2889                                          llvm::Constant *Addr, uint64_t Size) {
2890   StringRef Name = Addr->getName();
2891   auto *TgtOffloadEntryType = cast<llvm::StructType>(
2892       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()));
2893   llvm::LLVMContext &C = CGM.getModule().getContext();
2894   llvm::Module &M = CGM.getModule();
2895 
2896   // Make sure the address has the right type.
2897   llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy);
2898 
2899   // Create constant string with the name.
2900   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
2901 
2902   llvm::GlobalVariable *Str =
2903       new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true,
2904                                llvm::GlobalValue::InternalLinkage, StrPtrInit,
2905                                ".omp_offloading.entry_name");
2906   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
2907   llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy);
2908 
2909   // Create the entry struct.
2910   llvm::Constant *EntryInit = llvm::ConstantStruct::get(
2911       TgtOffloadEntryType, AddrPtr, StrPtr,
2912       llvm::ConstantInt::get(CGM.SizeTy, Size), nullptr);
2913   llvm::GlobalVariable *Entry = new llvm::GlobalVariable(
2914       M, TgtOffloadEntryType, true, llvm::GlobalValue::ExternalLinkage,
2915       EntryInit, ".omp_offloading.entry");
2916 
2917   // The entry has to be created in the section the linker expects it to be.
2918   Entry->setSection(".omp_offloading.entries");
2919   // We can't have any padding between symbols, so we need to have 1-byte
2920   // alignment.
2921   Entry->setAlignment(1);
2922 }
2923 
2924 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2925   // Emit the offloading entries and metadata so that the device codegen side
2926   // can
2927   // easily figure out what to emit. The produced metadata looks like this:
2928   //
2929   // !omp_offload.info = !{!1, ...}
2930   //
2931   // Right now we only generate metadata for function that contain target
2932   // regions.
2933 
2934   // If we do not have entries, we dont need to do anything.
2935   if (OffloadEntriesInfoManager.empty())
2936     return;
2937 
2938   llvm::Module &M = CGM.getModule();
2939   llvm::LLVMContext &C = M.getContext();
2940   SmallVector<OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
2941       OrderedEntries(OffloadEntriesInfoManager.size());
2942 
2943   // Create the offloading info metadata node.
2944   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
2945 
2946   // Auxiliar methods to create metadata values and strings.
2947   auto getMDInt = [&](unsigned v) {
2948     return llvm::ConstantAsMetadata::get(
2949         llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v));
2950   };
2951 
2952   auto getMDString = [&](StringRef v) { return llvm::MDString::get(C, v); };
2953 
2954   // Create function that emits metadata for each target region entry;
2955   auto &&TargetRegionMetadataEmitter = [&](
2956       unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line,
2957       OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
2958     llvm::SmallVector<llvm::Metadata *, 32> Ops;
2959     // Generate metadata for target regions. Each entry of this metadata
2960     // contains:
2961     // - Entry 0 -> Kind of this type of metadata (0).
2962     // - Entry 1 -> Device ID of the file where the entry was identified.
2963     // - Entry 2 -> File ID of the file where the entry was identified.
2964     // - Entry 3 -> Mangled name of the function where the entry was identified.
2965     // - Entry 4 -> Line in the file where the entry was identified.
2966     // - Entry 5 -> Order the entry was created.
2967     // The first element of the metadata node is the kind.
2968     Ops.push_back(getMDInt(E.getKind()));
2969     Ops.push_back(getMDInt(DeviceID));
2970     Ops.push_back(getMDInt(FileID));
2971     Ops.push_back(getMDString(ParentName));
2972     Ops.push_back(getMDInt(Line));
2973     Ops.push_back(getMDInt(E.getOrder()));
2974 
2975     // Save this entry in the right position of the ordered entries array.
2976     OrderedEntries[E.getOrder()] = &E;
2977 
2978     // Add metadata to the named metadata node.
2979     MD->addOperand(llvm::MDNode::get(C, Ops));
2980   };
2981 
2982   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
2983       TargetRegionMetadataEmitter);
2984 
2985   for (auto *E : OrderedEntries) {
2986     assert(E && "All ordered entries must exist!");
2987     if (auto *CE =
2988             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
2989                 E)) {
2990       assert(CE->getID() && CE->getAddress() &&
2991              "Entry ID and Addr are invalid!");
2992       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0);
2993     } else
2994       llvm_unreachable("Unsupported entry kind.");
2995   }
2996 }
2997 
2998 /// \brief Loads all the offload entries information from the host IR
2999 /// metadata.
3000 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3001   // If we are in target mode, load the metadata from the host IR. This code has
3002   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3003 
3004   if (!CGM.getLangOpts().OpenMPIsDevice)
3005     return;
3006 
3007   if (CGM.getLangOpts().OMPHostIRFile.empty())
3008     return;
3009 
3010   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3011   if (Buf.getError())
3012     return;
3013 
3014   llvm::LLVMContext C;
3015   auto ME = expectedToErrorOrAndEmitErrors(
3016       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3017 
3018   if (ME.getError())
3019     return;
3020 
3021   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3022   if (!MD)
3023     return;
3024 
3025   for (auto I : MD->operands()) {
3026     llvm::MDNode *MN = cast<llvm::MDNode>(I);
3027 
3028     auto getMDInt = [&](unsigned Idx) {
3029       llvm::ConstantAsMetadata *V =
3030           cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3031       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3032     };
3033 
3034     auto getMDString = [&](unsigned Idx) {
3035       llvm::MDString *V = cast<llvm::MDString>(MN->getOperand(Idx));
3036       return V->getString();
3037     };
3038 
3039     switch (getMDInt(0)) {
3040     default:
3041       llvm_unreachable("Unexpected metadata!");
3042       break;
3043     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3044         OFFLOAD_ENTRY_INFO_TARGET_REGION:
3045       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3046           /*DeviceID=*/getMDInt(1), /*FileID=*/getMDInt(2),
3047           /*ParentName=*/getMDString(3), /*Line=*/getMDInt(4),
3048           /*Order=*/getMDInt(5));
3049       break;
3050     }
3051   }
3052 }
3053 
3054 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3055   if (!KmpRoutineEntryPtrTy) {
3056     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3057     auto &C = CGM.getContext();
3058     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3059     FunctionProtoType::ExtProtoInfo EPI;
3060     KmpRoutineEntryPtrQTy = C.getPointerType(
3061         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3062     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3063   }
3064 }
3065 
3066 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
3067                                        QualType FieldTy) {
3068   auto *Field = FieldDecl::Create(
3069       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
3070       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
3071       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
3072   Field->setAccess(AS_public);
3073   DC->addDecl(Field);
3074   return Field;
3075 }
3076 
3077 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3078 
3079   // Make sure the type of the entry is already created. This is the type we
3080   // have to create:
3081   // struct __tgt_offload_entry{
3082   //   void      *addr;       // Pointer to the offload entry info.
3083   //                          // (function or global)
3084   //   char      *name;       // Name of the function or global.
3085   //   size_t     size;       // Size of the entry info (0 if it a function).
3086   // };
3087   if (TgtOffloadEntryQTy.isNull()) {
3088     ASTContext &C = CGM.getContext();
3089     auto *RD = C.buildImplicitRecord("__tgt_offload_entry");
3090     RD->startDefinition();
3091     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3092     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3093     addFieldToRecordDecl(C, RD, C.getSizeType());
3094     RD->completeDefinition();
3095     TgtOffloadEntryQTy = C.getRecordType(RD);
3096   }
3097   return TgtOffloadEntryQTy;
3098 }
3099 
3100 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() {
3101   // These are the types we need to build:
3102   // struct __tgt_device_image{
3103   // void   *ImageStart;       // Pointer to the target code start.
3104   // void   *ImageEnd;         // Pointer to the target code end.
3105   // // We also add the host entries to the device image, as it may be useful
3106   // // for the target runtime to have access to that information.
3107   // __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all
3108   //                                       // the entries.
3109   // __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
3110   //                                       // entries (non inclusive).
3111   // };
3112   if (TgtDeviceImageQTy.isNull()) {
3113     ASTContext &C = CGM.getContext();
3114     auto *RD = C.buildImplicitRecord("__tgt_device_image");
3115     RD->startDefinition();
3116     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3117     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3118     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
3119     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
3120     RD->completeDefinition();
3121     TgtDeviceImageQTy = C.getRecordType(RD);
3122   }
3123   return TgtDeviceImageQTy;
3124 }
3125 
3126 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() {
3127   // struct __tgt_bin_desc{
3128   //   int32_t              NumDevices;      // Number of devices supported.
3129   //   __tgt_device_image   *DeviceImages;   // Arrays of device images
3130   //                                         // (one per device).
3131   //   __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all the
3132   //                                         // entries.
3133   //   __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
3134   //                                         // entries (non inclusive).
3135   // };
3136   if (TgtBinaryDescriptorQTy.isNull()) {
3137     ASTContext &C = CGM.getContext();
3138     auto *RD = C.buildImplicitRecord("__tgt_bin_desc");
3139     RD->startDefinition();
3140     addFieldToRecordDecl(
3141         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3142     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy()));
3143     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
3144     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
3145     RD->completeDefinition();
3146     TgtBinaryDescriptorQTy = C.getRecordType(RD);
3147   }
3148   return TgtBinaryDescriptorQTy;
3149 }
3150 
3151 namespace {
3152 struct PrivateHelpersTy {
3153   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
3154                    const VarDecl *PrivateElemInit)
3155       : Original(Original), PrivateCopy(PrivateCopy),
3156         PrivateElemInit(PrivateElemInit) {}
3157   const VarDecl *Original;
3158   const VarDecl *PrivateCopy;
3159   const VarDecl *PrivateElemInit;
3160 };
3161 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3162 } // anonymous namespace
3163 
3164 static RecordDecl *
3165 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3166   if (!Privates.empty()) {
3167     auto &C = CGM.getContext();
3168     // Build struct .kmp_privates_t. {
3169     //         /*  private vars  */
3170     //       };
3171     auto *RD = C.buildImplicitRecord(".kmp_privates.t");
3172     RD->startDefinition();
3173     for (auto &&Pair : Privates) {
3174       auto *VD = Pair.second.Original;
3175       auto Type = VD->getType();
3176       Type = Type.getNonReferenceType();
3177       auto *FD = addFieldToRecordDecl(C, RD, Type);
3178       if (VD->hasAttrs()) {
3179         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3180              E(VD->getAttrs().end());
3181              I != E; ++I)
3182           FD->addAttr(*I);
3183       }
3184     }
3185     RD->completeDefinition();
3186     return RD;
3187   }
3188   return nullptr;
3189 }
3190 
3191 static RecordDecl *
3192 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3193                          QualType KmpInt32Ty,
3194                          QualType KmpRoutineEntryPointerQTy) {
3195   auto &C = CGM.getContext();
3196   // Build struct kmp_task_t {
3197   //         void *              shareds;
3198   //         kmp_routine_entry_t routine;
3199   //         kmp_int32           part_id;
3200   //         kmp_cmplrdata_t data1;
3201   //         kmp_cmplrdata_t data2;
3202   // For taskloops additional fields:
3203   //         kmp_uint64          lb;
3204   //         kmp_uint64          ub;
3205   //         kmp_int64           st;
3206   //         kmp_int32           liter;
3207   //       };
3208   auto *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3209   UD->startDefinition();
3210   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3211   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3212   UD->completeDefinition();
3213   QualType KmpCmplrdataTy = C.getRecordType(UD);
3214   auto *RD = C.buildImplicitRecord("kmp_task_t");
3215   RD->startDefinition();
3216   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3217   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3218   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3219   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3220   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3221   if (isOpenMPTaskLoopDirective(Kind)) {
3222     QualType KmpUInt64Ty =
3223         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3224     QualType KmpInt64Ty =
3225         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3226     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3227     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3228     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3229     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3230   }
3231   RD->completeDefinition();
3232   return RD;
3233 }
3234 
3235 static RecordDecl *
3236 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3237                                      ArrayRef<PrivateDataTy> Privates) {
3238   auto &C = CGM.getContext();
3239   // Build struct kmp_task_t_with_privates {
3240   //         kmp_task_t task_data;
3241   //         .kmp_privates_t. privates;
3242   //       };
3243   auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3244   RD->startDefinition();
3245   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3246   if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) {
3247     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3248   }
3249   RD->completeDefinition();
3250   return RD;
3251 }
3252 
3253 /// \brief Emit a proxy function which accepts kmp_task_t as the second
3254 /// argument.
3255 /// \code
3256 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3257 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3258 ///   For taskloops:
3259 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3260 ///   tt->shareds);
3261 ///   return 0;
3262 /// }
3263 /// \endcode
3264 static llvm::Value *
3265 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3266                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3267                       QualType KmpTaskTWithPrivatesPtrQTy,
3268                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3269                       QualType SharedsPtrTy, llvm::Value *TaskFunction,
3270                       llvm::Value *TaskPrivatesMap) {
3271   auto &C = CGM.getContext();
3272   FunctionArgList Args;
3273   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
3274   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
3275                                 /*Id=*/nullptr,
3276                                 KmpTaskTWithPrivatesPtrQTy.withRestrict());
3277   Args.push_back(&GtidArg);
3278   Args.push_back(&TaskTypeArg);
3279   auto &TaskEntryFnInfo =
3280       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3281   auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3282   auto *TaskEntry =
3283       llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
3284                              ".omp_task_entry.", &CGM.getModule());
3285   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo);
3286   CodeGenFunction CGF(CGM);
3287   CGF.disableDebugInfo();
3288   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
3289 
3290   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3291   // tt,
3292   // For taskloops:
3293   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3294   // tt->task_data.shareds);
3295   auto *GtidParam = CGF.EmitLoadOfScalar(
3296       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3297   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3298       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3299       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3300   auto *KmpTaskTWithPrivatesQTyRD =
3301       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3302   LValue Base =
3303       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3304   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3305   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3306   auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3307   auto *PartidParam = PartIdLVal.getPointer();
3308 
3309   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3310   auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3311   auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3312       CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(),
3313       CGF.ConvertTypeForMem(SharedsPtrTy));
3314 
3315   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3316   llvm::Value *PrivatesParam;
3317   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3318     auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3319     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3320         PrivatesLVal.getPointer(), CGF.VoidPtrTy);
3321   } else
3322     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3323 
3324   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3325                                TaskPrivatesMap,
3326                                CGF.Builder
3327                                    .CreatePointerBitCastOrAddrSpaceCast(
3328                                        TDBase.getAddress(), CGF.VoidPtrTy)
3329                                    .getPointer()};
3330   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3331                                           std::end(CommonArgs));
3332   if (isOpenMPTaskLoopDirective(Kind)) {
3333     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3334     auto LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3335     auto *LBParam = CGF.EmitLoadOfLValue(LBLVal, Loc).getScalarVal();
3336     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3337     auto UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3338     auto *UBParam = CGF.EmitLoadOfLValue(UBLVal, Loc).getScalarVal();
3339     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3340     auto StLVal = CGF.EmitLValueForField(Base, *StFI);
3341     auto *StParam = CGF.EmitLoadOfLValue(StLVal, Loc).getScalarVal();
3342     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3343     auto LILVal = CGF.EmitLValueForField(Base, *LIFI);
3344     auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal();
3345     CallArgs.push_back(LBParam);
3346     CallArgs.push_back(UBParam);
3347     CallArgs.push_back(StParam);
3348     CallArgs.push_back(LIParam);
3349   }
3350   CallArgs.push_back(SharedsParam);
3351 
3352   CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
3353   CGF.EmitStoreThroughLValue(
3354       RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3355       CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3356   CGF.FinishFunction();
3357   return TaskEntry;
3358 }
3359 
3360 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3361                                             SourceLocation Loc,
3362                                             QualType KmpInt32Ty,
3363                                             QualType KmpTaskTWithPrivatesPtrQTy,
3364                                             QualType KmpTaskTWithPrivatesQTy) {
3365   auto &C = CGM.getContext();
3366   FunctionArgList Args;
3367   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
3368   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
3369                                 /*Id=*/nullptr,
3370                                 KmpTaskTWithPrivatesPtrQTy.withRestrict());
3371   Args.push_back(&GtidArg);
3372   Args.push_back(&TaskTypeArg);
3373   FunctionType::ExtInfo Info;
3374   auto &DestructorFnInfo =
3375       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3376   auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo);
3377   auto *DestructorFn =
3378       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3379                              ".omp_task_destructor.", &CGM.getModule());
3380   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn,
3381                                     DestructorFnInfo);
3382   CodeGenFunction CGF(CGM);
3383   CGF.disableDebugInfo();
3384   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3385                     Args);
3386 
3387   LValue Base = CGF.EmitLoadOfPointerLValue(
3388       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3389       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3390   auto *KmpTaskTWithPrivatesQTyRD =
3391       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3392   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3393   Base = CGF.EmitLValueForField(Base, *FI);
3394   for (auto *Field :
3395        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3396     if (auto DtorKind = Field->getType().isDestructedType()) {
3397       auto FieldLValue = CGF.EmitLValueForField(Base, Field);
3398       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
3399     }
3400   }
3401   CGF.FinishFunction();
3402   return DestructorFn;
3403 }
3404 
3405 /// \brief Emit a privates mapping function for correct handling of private and
3406 /// firstprivate variables.
3407 /// \code
3408 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3409 /// **noalias priv1,...,  <tyn> **noalias privn) {
3410 ///   *priv1 = &.privates.priv1;
3411 ///   ...;
3412 ///   *privn = &.privates.privn;
3413 /// }
3414 /// \endcode
3415 static llvm::Value *
3416 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3417                                ArrayRef<const Expr *> PrivateVars,
3418                                ArrayRef<const Expr *> FirstprivateVars,
3419                                ArrayRef<const Expr *> LastprivateVars,
3420                                QualType PrivatesQTy,
3421                                ArrayRef<PrivateDataTy> Privates) {
3422   auto &C = CGM.getContext();
3423   FunctionArgList Args;
3424   ImplicitParamDecl TaskPrivatesArg(
3425       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3426       C.getPointerType(PrivatesQTy).withConst().withRestrict());
3427   Args.push_back(&TaskPrivatesArg);
3428   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
3429   unsigned Counter = 1;
3430   for (auto *E: PrivateVars) {
3431     Args.push_back(ImplicitParamDecl::Create(
3432         C, /*DC=*/nullptr, Loc,
3433         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
3434                             .withConst()
3435                             .withRestrict()));
3436     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3437     PrivateVarsPos[VD] = Counter;
3438     ++Counter;
3439   }
3440   for (auto *E : FirstprivateVars) {
3441     Args.push_back(ImplicitParamDecl::Create(
3442         C, /*DC=*/nullptr, Loc,
3443         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
3444                             .withConst()
3445                             .withRestrict()));
3446     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3447     PrivateVarsPos[VD] = Counter;
3448     ++Counter;
3449   }
3450   for (auto *E: LastprivateVars) {
3451     Args.push_back(ImplicitParamDecl::Create(
3452         C, /*DC=*/nullptr, Loc,
3453         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
3454                             .withConst()
3455                             .withRestrict()));
3456     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3457     PrivateVarsPos[VD] = Counter;
3458     ++Counter;
3459   }
3460   auto &TaskPrivatesMapFnInfo =
3461       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3462   auto *TaskPrivatesMapTy =
3463       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3464   auto *TaskPrivatesMap = llvm::Function::Create(
3465       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage,
3466       ".omp_task_privates_map.", &CGM.getModule());
3467   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap,
3468                                     TaskPrivatesMapFnInfo);
3469   TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3470   CodeGenFunction CGF(CGM);
3471   CGF.disableDebugInfo();
3472   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3473                     TaskPrivatesMapFnInfo, Args);
3474 
3475   // *privi = &.privates.privi;
3476   LValue Base = CGF.EmitLoadOfPointerLValue(
3477       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3478       TaskPrivatesArg.getType()->castAs<PointerType>());
3479   auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3480   Counter = 0;
3481   for (auto *Field : PrivatesQTyRD->fields()) {
3482     auto FieldLVal = CGF.EmitLValueForField(Base, Field);
3483     auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3484     auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3485     auto RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3486         RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
3487     CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
3488     ++Counter;
3489   }
3490   CGF.FinishFunction();
3491   return TaskPrivatesMap;
3492 }
3493 
3494 static int array_pod_sort_comparator(const PrivateDataTy *P1,
3495                                      const PrivateDataTy *P2) {
3496   return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0);
3497 }
3498 
3499 /// Emit initialization for private variables in task-based directives.
3500 static void emitPrivatesInit(CodeGenFunction &CGF,
3501                              const OMPExecutableDirective &D,
3502                              Address KmpTaskSharedsPtr, LValue TDBase,
3503                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3504                              QualType SharedsTy, QualType SharedsPtrTy,
3505                              const OMPTaskDataTy &Data,
3506                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3507   auto &C = CGF.getContext();
3508   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3509   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3510   LValue SrcBase;
3511   if (!Data.FirstprivateVars.empty()) {
3512     SrcBase = CGF.MakeAddrLValue(
3513         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3514             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3515         SharedsTy);
3516   }
3517   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(
3518       cast<CapturedStmt>(*D.getAssociatedStmt()));
3519   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3520   for (auto &&Pair : Privates) {
3521     auto *VD = Pair.second.PrivateCopy;
3522     auto *Init = VD->getAnyInitializer();
3523     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3524                              !CGF.isTrivialInitializer(Init)))) {
3525       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3526       if (auto *Elem = Pair.second.PrivateElemInit) {
3527         auto *OriginalVD = Pair.second.Original;
3528         auto *SharedField = CapturesInfo.lookup(OriginalVD);
3529         auto SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3530         SharedRefLValue = CGF.MakeAddrLValue(
3531             Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
3532             SharedRefLValue.getType(), AlignmentSource::Decl);
3533         QualType Type = OriginalVD->getType();
3534         if (Type->isArrayType()) {
3535           // Initialize firstprivate array.
3536           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3537             // Perform simple memcpy.
3538             CGF.EmitAggregateAssign(PrivateLValue.getAddress(),
3539                                     SharedRefLValue.getAddress(), Type);
3540           } else {
3541             // Initialize firstprivate array using element-by-element
3542             // intialization.
3543             CGF.EmitOMPAggregateAssign(
3544                 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
3545                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3546                                                   Address SrcElement) {
3547                   // Clean up any temporaries needed by the initialization.
3548                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3549                   InitScope.addPrivate(
3550                       Elem, [SrcElement]() -> Address { return SrcElement; });
3551                   (void)InitScope.Privatize();
3552                   // Emit initialization for single element.
3553                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3554                       CGF, &CapturesInfo);
3555                   CGF.EmitAnyExprToMem(Init, DestElement,
3556                                        Init->getType().getQualifiers(),
3557                                        /*IsInitializer=*/false);
3558                 });
3559           }
3560         } else {
3561           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3562           InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
3563             return SharedRefLValue.getAddress();
3564           });
3565           (void)InitScope.Privatize();
3566           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3567           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3568                              /*capturedByInit=*/false);
3569         }
3570       } else
3571         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3572     }
3573     ++FI;
3574   }
3575 }
3576 
3577 /// Check if duplication function is required for taskloops.
3578 static bool checkInitIsRequired(CodeGenFunction &CGF,
3579                                 ArrayRef<PrivateDataTy> Privates) {
3580   bool InitRequired = false;
3581   for (auto &&Pair : Privates) {
3582     auto *VD = Pair.second.PrivateCopy;
3583     auto *Init = VD->getAnyInitializer();
3584     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3585                                     !CGF.isTrivialInitializer(Init));
3586   }
3587   return InitRequired;
3588 }
3589 
3590 
3591 /// Emit task_dup function (for initialization of
3592 /// private/firstprivate/lastprivate vars and last_iter flag)
3593 /// \code
3594 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3595 /// lastpriv) {
3596 /// // setup lastprivate flag
3597 ///    task_dst->last = lastpriv;
3598 /// // could be constructor calls here...
3599 /// }
3600 /// \endcode
3601 static llvm::Value *
3602 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3603                     const OMPExecutableDirective &D,
3604                     QualType KmpTaskTWithPrivatesPtrQTy,
3605                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3606                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3607                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3608                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3609   auto &C = CGM.getContext();
3610   FunctionArgList Args;
3611   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc,
3612                            /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy);
3613   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc,
3614                            /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy);
3615   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc,
3616                                 /*Id=*/nullptr, C.IntTy);
3617   Args.push_back(&DstArg);
3618   Args.push_back(&SrcArg);
3619   Args.push_back(&LastprivArg);
3620   auto &TaskDupFnInfo =
3621       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3622   auto *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3623   auto *TaskDup =
3624       llvm::Function::Create(TaskDupTy, llvm::GlobalValue::InternalLinkage,
3625                              ".omp_task_dup.", &CGM.getModule());
3626   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskDup, TaskDupFnInfo);
3627   CodeGenFunction CGF(CGM);
3628   CGF.disableDebugInfo();
3629   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args);
3630 
3631   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3632       CGF.GetAddrOfLocalVar(&DstArg),
3633       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3634   // task_dst->liter = lastpriv;
3635   if (WithLastIter) {
3636     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3637     LValue Base = CGF.EmitLValueForField(
3638         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3639     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3640     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3641         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3642     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3643   }
3644 
3645   // Emit initial values for private copies (if any).
3646   assert(!Privates.empty());
3647   Address KmpTaskSharedsPtr = Address::invalid();
3648   if (!Data.FirstprivateVars.empty()) {
3649     LValue TDBase = CGF.EmitLoadOfPointerLValue(
3650         CGF.GetAddrOfLocalVar(&SrcArg),
3651         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3652     LValue Base = CGF.EmitLValueForField(
3653         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3654     KmpTaskSharedsPtr = Address(
3655         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3656                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
3657                                                   KmpTaskTShareds)),
3658                              Loc),
3659         CGF.getNaturalTypeAlignment(SharedsTy));
3660   }
3661   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3662                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3663   CGF.FinishFunction();
3664   return TaskDup;
3665 }
3666 
3667 /// Checks if destructor function is required to be generated.
3668 /// \return true if cleanups are required, false otherwise.
3669 static bool
3670 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
3671   bool NeedsCleanup = false;
3672   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3673   auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
3674   for (auto *FD : PrivateRD->fields()) {
3675     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
3676     if (NeedsCleanup)
3677       break;
3678   }
3679   return NeedsCleanup;
3680 }
3681 
3682 CGOpenMPRuntime::TaskResultTy
3683 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
3684                               const OMPExecutableDirective &D,
3685                               llvm::Value *TaskFunction, QualType SharedsTy,
3686                               Address Shareds, const OMPTaskDataTy &Data) {
3687   auto &C = CGM.getContext();
3688   llvm::SmallVector<PrivateDataTy, 4> Privates;
3689   // Aggregate privates and sort them by the alignment.
3690   auto I = Data.PrivateCopies.begin();
3691   for (auto *E : Data.PrivateVars) {
3692     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3693     Privates.push_back(std::make_pair(
3694         C.getDeclAlign(VD),
3695         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3696                          /*PrivateElemInit=*/nullptr)));
3697     ++I;
3698   }
3699   I = Data.FirstprivateCopies.begin();
3700   auto IElemInitRef = Data.FirstprivateInits.begin();
3701   for (auto *E : Data.FirstprivateVars) {
3702     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3703     Privates.push_back(std::make_pair(
3704         C.getDeclAlign(VD),
3705         PrivateHelpersTy(
3706             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3707             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))));
3708     ++I;
3709     ++IElemInitRef;
3710   }
3711   I = Data.LastprivateCopies.begin();
3712   for (auto *E : Data.LastprivateVars) {
3713     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3714     Privates.push_back(std::make_pair(
3715         C.getDeclAlign(VD),
3716         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3717                          /*PrivateElemInit=*/nullptr)));
3718     ++I;
3719   }
3720   llvm::array_pod_sort(Privates.begin(), Privates.end(),
3721                        array_pod_sort_comparator);
3722   auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3723   // Build type kmp_routine_entry_t (if not built yet).
3724   emitKmpRoutineEntryT(KmpInt32Ty);
3725   // Build type kmp_task_t (if not built yet).
3726   if (KmpTaskTQTy.isNull()) {
3727     KmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3728         CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3729   }
3730   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3731   // Build particular struct kmp_task_t for the given task.
3732   auto *KmpTaskTWithPrivatesQTyRD =
3733       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
3734   auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
3735   QualType KmpTaskTWithPrivatesPtrQTy =
3736       C.getPointerType(KmpTaskTWithPrivatesQTy);
3737   auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
3738   auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo();
3739   auto *KmpTaskTWithPrivatesTySize = CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3740   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3741 
3742   // Emit initial values for private copies (if any).
3743   llvm::Value *TaskPrivatesMap = nullptr;
3744   auto *TaskPrivatesMapTy =
3745       std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(),
3746                 3)
3747           ->getType();
3748   if (!Privates.empty()) {
3749     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3750     TaskPrivatesMap = emitTaskPrivateMappingFunction(
3751         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
3752         FI->getType(), Privates);
3753     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3754         TaskPrivatesMap, TaskPrivatesMapTy);
3755   } else {
3756     TaskPrivatesMap = llvm::ConstantPointerNull::get(
3757         cast<llvm::PointerType>(TaskPrivatesMapTy));
3758   }
3759   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3760   // kmp_task_t *tt);
3761   auto *TaskEntry = emitProxyTaskFunction(
3762       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3763       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3764       TaskPrivatesMap);
3765 
3766   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3767   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3768   // kmp_routine_entry_t *task_entry);
3769   // Task flags. Format is taken from
3770   // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
3771   // description of kmp_tasking_flags struct.
3772   enum {
3773     TiedFlag = 0x1,
3774     FinalFlag = 0x2,
3775     DestructorsFlag = 0x8,
3776     PriorityFlag = 0x20
3777   };
3778   unsigned Flags = Data.Tied ? TiedFlag : 0;
3779   bool NeedsCleanup = false;
3780   if (!Privates.empty()) {
3781     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
3782     if (NeedsCleanup)
3783       Flags = Flags | DestructorsFlag;
3784   }
3785   if (Data.Priority.getInt())
3786     Flags = Flags | PriorityFlag;
3787   auto *TaskFlags =
3788       Data.Final.getPointer()
3789           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3790                                      CGF.Builder.getInt32(FinalFlag),
3791                                      CGF.Builder.getInt32(/*C=*/0))
3792           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3793   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3794   auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3795   llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
3796                               getThreadID(CGF, Loc), TaskFlags,
3797                               KmpTaskTWithPrivatesTySize, SharedsSize,
3798                               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3799                                   TaskEntry, KmpRoutineEntryPtrTy)};
3800   auto *NewTask = CGF.EmitRuntimeCall(
3801       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
3802   auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3803       NewTask, KmpTaskTWithPrivatesPtrTy);
3804   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
3805                                                KmpTaskTWithPrivatesQTy);
3806   LValue TDBase =
3807       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
3808   // Fill the data in the resulting kmp_task_t record.
3809   // Copy shareds if there are any.
3810   Address KmpTaskSharedsPtr = Address::invalid();
3811   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
3812     KmpTaskSharedsPtr =
3813         Address(CGF.EmitLoadOfScalar(
3814                     CGF.EmitLValueForField(
3815                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
3816                                            KmpTaskTShareds)),
3817                     Loc),
3818                 CGF.getNaturalTypeAlignment(SharedsTy));
3819     CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy);
3820   }
3821   // Emit initial values for private copies (if any).
3822   TaskResultTy Result;
3823   if (!Privates.empty()) {
3824     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
3825                      SharedsTy, SharedsPtrTy, Data, Privates,
3826                      /*ForDup=*/false);
3827     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
3828         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3829       Result.TaskDupFn = emitTaskDupFunction(
3830           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3831           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3832           /*WithLastIter=*/!Data.LastprivateVars.empty());
3833     }
3834   }
3835   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
3836   enum { Priority = 0, Destructors = 1 };
3837   // Provide pointer to function with destructors for privates.
3838   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
3839   auto *KmpCmplrdataUD = (*FI)->getType()->getAsUnionType()->getDecl();
3840   if (NeedsCleanup) {
3841     llvm::Value *DestructorFn = emitDestructorsFunction(
3842         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3843         KmpTaskTWithPrivatesQTy);
3844     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
3845     LValue DestructorsLV = CGF.EmitLValueForField(
3846         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
3847     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3848                               DestructorFn, KmpRoutineEntryPtrTy),
3849                           DestructorsLV);
3850   }
3851   // Set priority.
3852   if (Data.Priority.getInt()) {
3853     LValue Data2LV = CGF.EmitLValueForField(
3854         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
3855     LValue PriorityLV = CGF.EmitLValueForField(
3856         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
3857     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
3858   }
3859   Result.NewTask = NewTask;
3860   Result.TaskEntry = TaskEntry;
3861   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
3862   Result.TDBase = TDBase;
3863   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
3864   return Result;
3865 }
3866 
3867 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
3868                                    const OMPExecutableDirective &D,
3869                                    llvm::Value *TaskFunction,
3870                                    QualType SharedsTy, Address Shareds,
3871                                    const Expr *IfCond,
3872                                    const OMPTaskDataTy &Data) {
3873   if (!CGF.HaveInsertPoint())
3874     return;
3875 
3876   TaskResultTy Result =
3877       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
3878   llvm::Value *NewTask = Result.NewTask;
3879   llvm::Value *TaskEntry = Result.TaskEntry;
3880   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
3881   LValue TDBase = Result.TDBase;
3882   RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
3883   auto &C = CGM.getContext();
3884   // Process list of dependences.
3885   Address DependenciesArray = Address::invalid();
3886   unsigned NumDependencies = Data.Dependences.size();
3887   if (NumDependencies) {
3888     // Dependence kind for RTL.
3889     enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 };
3890     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
3891     RecordDecl *KmpDependInfoRD;
3892     QualType FlagsTy =
3893         C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
3894     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
3895     if (KmpDependInfoTy.isNull()) {
3896       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
3897       KmpDependInfoRD->startDefinition();
3898       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
3899       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
3900       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
3901       KmpDependInfoRD->completeDefinition();
3902       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
3903     } else
3904       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
3905     CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy);
3906     // Define type kmp_depend_info[<Dependences.size()>];
3907     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
3908         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
3909         ArrayType::Normal, /*IndexTypeQuals=*/0);
3910     // kmp_depend_info[<Dependences.size()>] deps;
3911     DependenciesArray =
3912         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
3913     for (unsigned i = 0; i < NumDependencies; ++i) {
3914       const Expr *E = Data.Dependences[i].second;
3915       auto Addr = CGF.EmitLValue(E);
3916       llvm::Value *Size;
3917       QualType Ty = E->getType();
3918       if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
3919         LValue UpAddrLVal =
3920             CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
3921         llvm::Value *UpAddr =
3922             CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
3923         llvm::Value *LowIntPtr =
3924             CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
3925         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
3926         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3927       } else
3928         Size = CGF.getTypeSize(Ty);
3929       auto Base = CGF.MakeAddrLValue(
3930           CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize),
3931           KmpDependInfoTy);
3932       // deps[i].base_addr = &<Dependences[i].second>;
3933       auto BaseAddrLVal = CGF.EmitLValueForField(
3934           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
3935       CGF.EmitStoreOfScalar(
3936           CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
3937           BaseAddrLVal);
3938       // deps[i].len = sizeof(<Dependences[i].second>);
3939       auto LenLVal = CGF.EmitLValueForField(
3940           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
3941       CGF.EmitStoreOfScalar(Size, LenLVal);
3942       // deps[i].flags = <Dependences[i].first>;
3943       RTLDependenceKindTy DepKind;
3944       switch (Data.Dependences[i].first) {
3945       case OMPC_DEPEND_in:
3946         DepKind = DepIn;
3947         break;
3948       // Out and InOut dependencies must use the same code.
3949       case OMPC_DEPEND_out:
3950       case OMPC_DEPEND_inout:
3951         DepKind = DepInOut;
3952         break;
3953       case OMPC_DEPEND_source:
3954       case OMPC_DEPEND_sink:
3955       case OMPC_DEPEND_unknown:
3956         llvm_unreachable("Unknown task dependence type");
3957       }
3958       auto FlagsLVal = CGF.EmitLValueForField(
3959           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
3960       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
3961                             FlagsLVal);
3962     }
3963     DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3964         CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()),
3965         CGF.VoidPtrTy);
3966   }
3967 
3968   // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
3969   // libcall.
3970   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
3971   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
3972   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
3973   // list is not empty
3974   auto *ThreadID = getThreadID(CGF, Loc);
3975   auto *UpLoc = emitUpdateLocation(CGF, Loc);
3976   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
3977   llvm::Value *DepTaskArgs[7];
3978   if (NumDependencies) {
3979     DepTaskArgs[0] = UpLoc;
3980     DepTaskArgs[1] = ThreadID;
3981     DepTaskArgs[2] = NewTask;
3982     DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
3983     DepTaskArgs[4] = DependenciesArray.getPointer();
3984     DepTaskArgs[5] = CGF.Builder.getInt32(0);
3985     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3986   }
3987   auto &&ThenCodeGen = [this, Loc, &Data, TDBase, KmpTaskTQTyRD,
3988                         NumDependencies, &TaskArgs,
3989                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
3990     if (!Data.Tied) {
3991       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3992       auto PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
3993       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
3994     }
3995     if (NumDependencies) {
3996       CGF.EmitRuntimeCall(
3997           createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
3998     } else {
3999       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
4000                           TaskArgs);
4001     }
4002     // Check if parent region is untied and build return for untied task;
4003     if (auto *Region =
4004             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4005       Region->emitUntiedSwitch(CGF);
4006   };
4007 
4008   llvm::Value *DepWaitTaskArgs[6];
4009   if (NumDependencies) {
4010     DepWaitTaskArgs[0] = UpLoc;
4011     DepWaitTaskArgs[1] = ThreadID;
4012     DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
4013     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
4014     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4015     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4016   }
4017   auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
4018                         NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF,
4019                                                            PrePostActionTy &) {
4020     auto &RT = CGF.CGM.getOpenMPRuntime();
4021     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4022     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4023     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4024     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4025     // is specified.
4026     if (NumDependencies)
4027       CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
4028                           DepWaitTaskArgs);
4029     // Call proxy_task_entry(gtid, new_task);
4030     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy](
4031         CodeGenFunction &CGF, PrePostActionTy &Action) {
4032       Action.Enter(CGF);
4033       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4034       CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs);
4035     };
4036 
4037     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4038     // kmp_task_t *new_task);
4039     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4040     // kmp_task_t *new_task);
4041     RegionCodeGenTy RCG(CodeGen);
4042     CommonActionTy Action(
4043         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
4044         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
4045     RCG.setAction(Action);
4046     RCG(CGF);
4047   };
4048 
4049   if (IfCond)
4050     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4051   else {
4052     RegionCodeGenTy ThenRCG(ThenCodeGen);
4053     ThenRCG(CGF);
4054   }
4055 }
4056 
4057 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
4058                                        const OMPLoopDirective &D,
4059                                        llvm::Value *TaskFunction,
4060                                        QualType SharedsTy, Address Shareds,
4061                                        const Expr *IfCond,
4062                                        const OMPTaskDataTy &Data) {
4063   if (!CGF.HaveInsertPoint())
4064     return;
4065   TaskResultTy Result =
4066       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4067   // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
4068   // libcall.
4069   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4070   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4071   // sched, kmp_uint64 grainsize, void *task_dup);
4072   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4073   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4074   llvm::Value *IfVal;
4075   if (IfCond) {
4076     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4077                                       /*isSigned=*/true);
4078   } else
4079     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4080 
4081   LValue LBLVal = CGF.EmitLValueForField(
4082       Result.TDBase,
4083       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4084   auto *LBVar =
4085       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4086   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
4087                        /*IsInitializer=*/true);
4088   LValue UBLVal = CGF.EmitLValueForField(
4089       Result.TDBase,
4090       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4091   auto *UBVar =
4092       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4093   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
4094                        /*IsInitializer=*/true);
4095   LValue StLVal = CGF.EmitLValueForField(
4096       Result.TDBase,
4097       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4098   auto *StVar =
4099       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4100   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
4101                        /*IsInitializer=*/true);
4102   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4103   llvm::Value *TaskArgs[] = {
4104       UpLoc, ThreadID, Result.NewTask, IfVal, LBLVal.getPointer(),
4105       UBLVal.getPointer(), CGF.EmitLoadOfScalar(StLVal, SourceLocation()),
4106       llvm::ConstantInt::getSigned(CGF.IntTy, Data.Nogroup ? 1 : 0),
4107       llvm::ConstantInt::getSigned(
4108           CGF.IntTy, Data.Schedule.getPointer()
4109                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
4110                          : NoSchedule),
4111       Data.Schedule.getPointer()
4112           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4113                                       /*isSigned=*/false)
4114           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
4115       Result.TaskDupFn
4116           ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Result.TaskDupFn,
4117                                                             CGF.VoidPtrTy)
4118           : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
4119   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
4120 }
4121 
4122 /// \brief Emit reduction operation for each element of array (required for
4123 /// array sections) LHS op = RHS.
4124 /// \param Type Type of array.
4125 /// \param LHSVar Variable on the left side of the reduction operation
4126 /// (references element of array in original variable).
4127 /// \param RHSVar Variable on the right side of the reduction operation
4128 /// (references element of array in original variable).
4129 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
4130 /// RHSVar.
4131 static void EmitOMPAggregateReduction(
4132     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4133     const VarDecl *RHSVar,
4134     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4135                                   const Expr *, const Expr *)> &RedOpGen,
4136     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4137     const Expr *UpExpr = nullptr) {
4138   // Perform element-by-element initialization.
4139   QualType ElementTy;
4140   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4141   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4142 
4143   // Drill down to the base element type on both arrays.
4144   auto ArrayTy = Type->getAsArrayTypeUnsafe();
4145   auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4146 
4147   auto RHSBegin = RHSAddr.getPointer();
4148   auto LHSBegin = LHSAddr.getPointer();
4149   // Cast from pointer to array type to pointer to single element.
4150   auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
4151   // The basic structure here is a while-do loop.
4152   auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4153   auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4154   auto IsEmpty =
4155       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4156   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4157 
4158   // Enter the loop body, making that address the current address.
4159   auto EntryBB = CGF.Builder.GetInsertBlock();
4160   CGF.EmitBlock(BodyBB);
4161 
4162   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4163 
4164   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4165       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4166   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4167   Address RHSElementCurrent =
4168       Address(RHSElementPHI,
4169               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4170 
4171   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4172       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4173   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4174   Address LHSElementCurrent =
4175       Address(LHSElementPHI,
4176               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4177 
4178   // Emit copy.
4179   CodeGenFunction::OMPPrivateScope Scope(CGF);
4180   Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; });
4181   Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; });
4182   Scope.Privatize();
4183   RedOpGen(CGF, XExpr, EExpr, UpExpr);
4184   Scope.ForceCleanup();
4185 
4186   // Shift the address forward by one element.
4187   auto LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4188       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
4189   auto RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4190       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
4191   // Check whether we've reached the end.
4192   auto Done =
4193       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4194   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4195   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4196   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4197 
4198   // Done.
4199   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4200 }
4201 
4202 /// Emit reduction combiner. If the combiner is a simple expression emit it as
4203 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4204 /// UDR combiner function.
4205 static void emitReductionCombiner(CodeGenFunction &CGF,
4206                                   const Expr *ReductionOp) {
4207   if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
4208     if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4209       if (auto *DRE =
4210               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4211         if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4212           std::pair<llvm::Function *, llvm::Function *> Reduction =
4213               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
4214           RValue Func = RValue::get(Reduction.first);
4215           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4216           CGF.EmitIgnoredExpr(ReductionOp);
4217           return;
4218         }
4219   CGF.EmitIgnoredExpr(ReductionOp);
4220 }
4221 
4222 static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
4223                                           llvm::Type *ArgsType,
4224                                           ArrayRef<const Expr *> Privates,
4225                                           ArrayRef<const Expr *> LHSExprs,
4226                                           ArrayRef<const Expr *> RHSExprs,
4227                                           ArrayRef<const Expr *> ReductionOps) {
4228   auto &C = CGM.getContext();
4229 
4230   // void reduction_func(void *LHSArg, void *RHSArg);
4231   FunctionArgList Args;
4232   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
4233                            C.VoidPtrTy);
4234   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
4235                            C.VoidPtrTy);
4236   Args.push_back(&LHSArg);
4237   Args.push_back(&RHSArg);
4238   auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4239   auto *Fn = llvm::Function::Create(
4240       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
4241       ".omp.reduction.reduction_func", &CGM.getModule());
4242   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
4243   CodeGenFunction CGF(CGM);
4244   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
4245 
4246   // Dst = (void*[n])(LHSArg);
4247   // Src = (void*[n])(RHSArg);
4248   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4249       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4250       ArgsType), CGF.getPointerAlign());
4251   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4252       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
4253       ArgsType), CGF.getPointerAlign());
4254 
4255   //  ...
4256   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4257   //  ...
4258   CodeGenFunction::OMPPrivateScope Scope(CGF);
4259   auto IPriv = Privates.begin();
4260   unsigned Idx = 0;
4261   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4262     auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
4263     Scope.addPrivate(RHSVar, [&]() -> Address {
4264       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
4265     });
4266     auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
4267     Scope.addPrivate(LHSVar, [&]() -> Address {
4268       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
4269     });
4270     QualType PrivTy = (*IPriv)->getType();
4271     if (PrivTy->isVariablyModifiedType()) {
4272       // Get array size and emit VLA type.
4273       ++Idx;
4274       Address Elem =
4275           CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize());
4276       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
4277       auto *VLA = CGF.getContext().getAsVariableArrayType(PrivTy);
4278       auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
4279       CodeGenFunction::OpaqueValueMapping OpaqueMap(
4280           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
4281       CGF.EmitVariablyModifiedType(PrivTy);
4282     }
4283   }
4284   Scope.Privatize();
4285   IPriv = Privates.begin();
4286   auto ILHS = LHSExprs.begin();
4287   auto IRHS = RHSExprs.begin();
4288   for (auto *E : ReductionOps) {
4289     if ((*IPriv)->getType()->isArrayType()) {
4290       // Emit reduction for array section.
4291       auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4292       auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4293       EmitOMPAggregateReduction(
4294           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4295           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4296             emitReductionCombiner(CGF, E);
4297           });
4298     } else
4299       // Emit reduction for array subscript or single variable.
4300       emitReductionCombiner(CGF, E);
4301     ++IPriv;
4302     ++ILHS;
4303     ++IRHS;
4304   }
4305   Scope.ForceCleanup();
4306   CGF.FinishFunction();
4307   return Fn;
4308 }
4309 
4310 static void emitSingleReductionCombiner(CodeGenFunction &CGF,
4311                                         const Expr *ReductionOp,
4312                                         const Expr *PrivateRef,
4313                                         const DeclRefExpr *LHS,
4314                                         const DeclRefExpr *RHS) {
4315   if (PrivateRef->getType()->isArrayType()) {
4316     // Emit reduction for array section.
4317     auto *LHSVar = cast<VarDecl>(LHS->getDecl());
4318     auto *RHSVar = cast<VarDecl>(RHS->getDecl());
4319     EmitOMPAggregateReduction(
4320         CGF, PrivateRef->getType(), LHSVar, RHSVar,
4321         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4322           emitReductionCombiner(CGF, ReductionOp);
4323         });
4324   } else
4325     // Emit reduction for array subscript or single variable.
4326     emitReductionCombiner(CGF, ReductionOp);
4327 }
4328 
4329 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
4330                                     ArrayRef<const Expr *> Privates,
4331                                     ArrayRef<const Expr *> LHSExprs,
4332                                     ArrayRef<const Expr *> RHSExprs,
4333                                     ArrayRef<const Expr *> ReductionOps,
4334                                     bool WithNowait, bool SimpleReduction) {
4335   if (!CGF.HaveInsertPoint())
4336     return;
4337   // Next code should be emitted for reduction:
4338   //
4339   // static kmp_critical_name lock = { 0 };
4340   //
4341   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
4342   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
4343   //  ...
4344   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
4345   //  *(Type<n>-1*)rhs[<n>-1]);
4346   // }
4347   //
4348   // ...
4349   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
4350   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
4351   // RedList, reduce_func, &<lock>)) {
4352   // case 1:
4353   //  ...
4354   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4355   //  ...
4356   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4357   // break;
4358   // case 2:
4359   //  ...
4360   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
4361   //  ...
4362   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
4363   // break;
4364   // default:;
4365   // }
4366   //
4367   // if SimpleReduction is true, only the next code is generated:
4368   //  ...
4369   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4370   //  ...
4371 
4372   auto &C = CGM.getContext();
4373 
4374   if (SimpleReduction) {
4375     CodeGenFunction::RunCleanupsScope Scope(CGF);
4376     auto IPriv = Privates.begin();
4377     auto ILHS = LHSExprs.begin();
4378     auto IRHS = RHSExprs.begin();
4379     for (auto *E : ReductionOps) {
4380       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
4381                                   cast<DeclRefExpr>(*IRHS));
4382       ++IPriv;
4383       ++ILHS;
4384       ++IRHS;
4385     }
4386     return;
4387   }
4388 
4389   // 1. Build a list of reduction variables.
4390   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
4391   auto Size = RHSExprs.size();
4392   for (auto *E : Privates) {
4393     if (E->getType()->isVariablyModifiedType())
4394       // Reserve place for array size.
4395       ++Size;
4396   }
4397   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
4398   QualType ReductionArrayTy =
4399       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
4400                              /*IndexTypeQuals=*/0);
4401   Address ReductionList =
4402       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
4403   auto IPriv = Privates.begin();
4404   unsigned Idx = 0;
4405   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
4406     Address Elem =
4407       CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize());
4408     CGF.Builder.CreateStore(
4409         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4410             CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
4411         Elem);
4412     if ((*IPriv)->getType()->isVariablyModifiedType()) {
4413       // Store array size.
4414       ++Idx;
4415       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
4416                                              CGF.getPointerSize());
4417       llvm::Value *Size = CGF.Builder.CreateIntCast(
4418           CGF.getVLASize(
4419                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
4420               .first,
4421           CGF.SizeTy, /*isSigned=*/false);
4422       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
4423                               Elem);
4424     }
4425   }
4426 
4427   // 2. Emit reduce_func().
4428   auto *ReductionFn = emitReductionFunction(
4429       CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
4430       LHSExprs, RHSExprs, ReductionOps);
4431 
4432   // 3. Create static kmp_critical_name lock = { 0 };
4433   auto *Lock = getCriticalRegionLock(".reduction");
4434 
4435   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
4436   // RedList, reduce_func, &<lock>);
4437   auto *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
4438   auto *ThreadId = getThreadID(CGF, Loc);
4439   auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
4440   auto *RL =
4441     CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList.getPointer(),
4442                                                     CGF.VoidPtrTy);
4443   llvm::Value *Args[] = {
4444       IdentTLoc,                             // ident_t *<loc>
4445       ThreadId,                              // i32 <gtid>
4446       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
4447       ReductionArrayTySize,                  // size_type sizeof(RedList)
4448       RL,                                    // void *RedList
4449       ReductionFn, // void (*) (void *, void *) <reduce_func>
4450       Lock         // kmp_critical_name *&<lock>
4451   };
4452   auto Res = CGF.EmitRuntimeCall(
4453       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
4454                                        : OMPRTL__kmpc_reduce),
4455       Args);
4456 
4457   // 5. Build switch(res)
4458   auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
4459   auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
4460 
4461   // 6. Build case 1:
4462   //  ...
4463   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4464   //  ...
4465   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4466   // break;
4467   auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
4468   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
4469   CGF.EmitBlock(Case1BB);
4470 
4471   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4472   llvm::Value *EndArgs[] = {
4473       IdentTLoc, // ident_t *<loc>
4474       ThreadId,  // i32 <gtid>
4475       Lock       // kmp_critical_name *&<lock>
4476   };
4477   auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps](
4478       CodeGenFunction &CGF, PrePostActionTy &Action) {
4479     auto IPriv = Privates.begin();
4480     auto ILHS = LHSExprs.begin();
4481     auto IRHS = RHSExprs.begin();
4482     for (auto *E : ReductionOps) {
4483       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
4484                                   cast<DeclRefExpr>(*IRHS));
4485       ++IPriv;
4486       ++ILHS;
4487       ++IRHS;
4488     }
4489   };
4490   RegionCodeGenTy RCG(CodeGen);
4491   CommonActionTy Action(
4492       nullptr, llvm::None,
4493       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
4494                                        : OMPRTL__kmpc_end_reduce),
4495       EndArgs);
4496   RCG.setAction(Action);
4497   RCG(CGF);
4498 
4499   CGF.EmitBranch(DefaultBB);
4500 
4501   // 7. Build case 2:
4502   //  ...
4503   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
4504   //  ...
4505   // break;
4506   auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
4507   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
4508   CGF.EmitBlock(Case2BB);
4509 
4510   auto &&AtomicCodeGen = [Loc, &Privates, &LHSExprs, &RHSExprs, &ReductionOps](
4511       CodeGenFunction &CGF, PrePostActionTy &Action) {
4512     auto ILHS = LHSExprs.begin();
4513     auto IRHS = RHSExprs.begin();
4514     auto IPriv = Privates.begin();
4515     for (auto *E : ReductionOps) {
4516       const Expr *XExpr = nullptr;
4517       const Expr *EExpr = nullptr;
4518       const Expr *UpExpr = nullptr;
4519       BinaryOperatorKind BO = BO_Comma;
4520       if (auto *BO = dyn_cast<BinaryOperator>(E)) {
4521         if (BO->getOpcode() == BO_Assign) {
4522           XExpr = BO->getLHS();
4523           UpExpr = BO->getRHS();
4524         }
4525       }
4526       // Try to emit update expression as a simple atomic.
4527       auto *RHSExpr = UpExpr;
4528       if (RHSExpr) {
4529         // Analyze RHS part of the whole expression.
4530         if (auto *ACO = dyn_cast<AbstractConditionalOperator>(
4531                 RHSExpr->IgnoreParenImpCasts())) {
4532           // If this is a conditional operator, analyze its condition for
4533           // min/max reduction operator.
4534           RHSExpr = ACO->getCond();
4535         }
4536         if (auto *BORHS =
4537                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
4538           EExpr = BORHS->getRHS();
4539           BO = BORHS->getOpcode();
4540         }
4541       }
4542       if (XExpr) {
4543         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4544         auto &&AtomicRedGen = [BO, VD, IPriv,
4545                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
4546                                     const Expr *EExpr, const Expr *UpExpr) {
4547           LValue X = CGF.EmitLValue(XExpr);
4548           RValue E;
4549           if (EExpr)
4550             E = CGF.EmitAnyExpr(EExpr);
4551           CGF.EmitOMPAtomicSimpleUpdateExpr(
4552               X, E, BO, /*IsXLHSInRHSPart=*/true,
4553               llvm::AtomicOrdering::Monotonic, Loc,
4554               [&CGF, UpExpr, VD, IPriv, Loc](RValue XRValue) {
4555                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4556                 PrivateScope.addPrivate(
4557                     VD, [&CGF, VD, XRValue, Loc]() -> Address {
4558                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
4559                       CGF.emitOMPSimpleStore(
4560                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
4561                           VD->getType().getNonReferenceType(), Loc);
4562                       return LHSTemp;
4563                     });
4564                 (void)PrivateScope.Privatize();
4565                 return CGF.EmitAnyExpr(UpExpr);
4566               });
4567         };
4568         if ((*IPriv)->getType()->isArrayType()) {
4569           // Emit atomic reduction for array section.
4570           auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4571           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
4572                                     AtomicRedGen, XExpr, EExpr, UpExpr);
4573         } else
4574           // Emit atomic reduction for array subscript or single variable.
4575           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
4576       } else {
4577         // Emit as a critical region.
4578         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
4579                                      const Expr *, const Expr *) {
4580           auto &RT = CGF.CGM.getOpenMPRuntime();
4581           RT.emitCriticalRegion(
4582               CGF, ".atomic_reduction",
4583               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
4584                 Action.Enter(CGF);
4585                 emitReductionCombiner(CGF, E);
4586               },
4587               Loc);
4588         };
4589         if ((*IPriv)->getType()->isArrayType()) {
4590           auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4591           auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4592           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4593                                     CritRedGen);
4594         } else
4595           CritRedGen(CGF, nullptr, nullptr, nullptr);
4596       }
4597       ++ILHS;
4598       ++IRHS;
4599       ++IPriv;
4600     }
4601   };
4602   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
4603   if (!WithNowait) {
4604     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
4605     llvm::Value *EndArgs[] = {
4606         IdentTLoc, // ident_t *<loc>
4607         ThreadId,  // i32 <gtid>
4608         Lock       // kmp_critical_name *&<lock>
4609     };
4610     CommonActionTy Action(nullptr, llvm::None,
4611                           createRuntimeFunction(OMPRTL__kmpc_end_reduce),
4612                           EndArgs);
4613     AtomicRCG.setAction(Action);
4614     AtomicRCG(CGF);
4615   } else
4616     AtomicRCG(CGF);
4617 
4618   CGF.EmitBranch(DefaultBB);
4619   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
4620 }
4621 
4622 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
4623                                        SourceLocation Loc) {
4624   if (!CGF.HaveInsertPoint())
4625     return;
4626   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
4627   // global_tid);
4628   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
4629   // Ignore return result until untied tasks are supported.
4630   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
4631   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4632     Region->emitUntiedSwitch(CGF);
4633 }
4634 
4635 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
4636                                            OpenMPDirectiveKind InnerKind,
4637                                            const RegionCodeGenTy &CodeGen,
4638                                            bool HasCancel) {
4639   if (!CGF.HaveInsertPoint())
4640     return;
4641   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
4642   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
4643 }
4644 
4645 namespace {
4646 enum RTCancelKind {
4647   CancelNoreq = 0,
4648   CancelParallel = 1,
4649   CancelLoop = 2,
4650   CancelSections = 3,
4651   CancelTaskgroup = 4
4652 };
4653 } // anonymous namespace
4654 
4655 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
4656   RTCancelKind CancelKind = CancelNoreq;
4657   if (CancelRegion == OMPD_parallel)
4658     CancelKind = CancelParallel;
4659   else if (CancelRegion == OMPD_for)
4660     CancelKind = CancelLoop;
4661   else if (CancelRegion == OMPD_sections)
4662     CancelKind = CancelSections;
4663   else {
4664     assert(CancelRegion == OMPD_taskgroup);
4665     CancelKind = CancelTaskgroup;
4666   }
4667   return CancelKind;
4668 }
4669 
4670 void CGOpenMPRuntime::emitCancellationPointCall(
4671     CodeGenFunction &CGF, SourceLocation Loc,
4672     OpenMPDirectiveKind CancelRegion) {
4673   if (!CGF.HaveInsertPoint())
4674     return;
4675   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
4676   // global_tid, kmp_int32 cncl_kind);
4677   if (auto *OMPRegionInfo =
4678           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
4679     if (OMPRegionInfo->hasCancel()) {
4680       llvm::Value *Args[] = {
4681           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
4682           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
4683       // Ignore return result until untied tasks are supported.
4684       auto *Result = CGF.EmitRuntimeCall(
4685           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
4686       // if (__kmpc_cancellationpoint()) {
4687       //  __kmpc_cancel_barrier();
4688       //   exit from construct;
4689       // }
4690       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
4691       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
4692       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
4693       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
4694       CGF.EmitBlock(ExitBB);
4695       // __kmpc_cancel_barrier();
4696       emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
4697       // exit from construct;
4698       auto CancelDest =
4699           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
4700       CGF.EmitBranchThroughCleanup(CancelDest);
4701       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
4702     }
4703   }
4704 }
4705 
4706 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
4707                                      const Expr *IfCond,
4708                                      OpenMPDirectiveKind CancelRegion) {
4709   if (!CGF.HaveInsertPoint())
4710     return;
4711   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
4712   // kmp_int32 cncl_kind);
4713   if (auto *OMPRegionInfo =
4714           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
4715     auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
4716                                                         PrePostActionTy &) {
4717       auto &RT = CGF.CGM.getOpenMPRuntime();
4718       llvm::Value *Args[] = {
4719           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
4720           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
4721       // Ignore return result until untied tasks are supported.
4722       auto *Result = CGF.EmitRuntimeCall(
4723           RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
4724       // if (__kmpc_cancel()) {
4725       //  __kmpc_cancel_barrier();
4726       //   exit from construct;
4727       // }
4728       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
4729       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
4730       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
4731       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
4732       CGF.EmitBlock(ExitBB);
4733       // __kmpc_cancel_barrier();
4734       RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
4735       // exit from construct;
4736       auto CancelDest =
4737           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
4738       CGF.EmitBranchThroughCleanup(CancelDest);
4739       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
4740     };
4741     if (IfCond)
4742       emitOMPIfClause(CGF, IfCond, ThenGen,
4743                       [](CodeGenFunction &, PrePostActionTy &) {});
4744     else {
4745       RegionCodeGenTy ThenRCG(ThenGen);
4746       ThenRCG(CGF);
4747     }
4748   }
4749 }
4750 
4751 /// \brief Obtain information that uniquely identifies a target entry. This
4752 /// consists of the file and device IDs as well as line number associated with
4753 /// the relevant entry source location.
4754 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
4755                                      unsigned &DeviceID, unsigned &FileID,
4756                                      unsigned &LineNum) {
4757 
4758   auto &SM = C.getSourceManager();
4759 
4760   // The loc should be always valid and have a file ID (the user cannot use
4761   // #pragma directives in macros)
4762 
4763   assert(Loc.isValid() && "Source location is expected to be always valid.");
4764   assert(Loc.isFileID() && "Source location is expected to refer to a file.");
4765 
4766   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
4767   assert(PLoc.isValid() && "Source location is expected to be always valid.");
4768 
4769   llvm::sys::fs::UniqueID ID;
4770   if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
4771     llvm_unreachable("Source file with target region no longer exists!");
4772 
4773   DeviceID = ID.getDevice();
4774   FileID = ID.getFile();
4775   LineNum = PLoc.getLine();
4776 }
4777 
4778 void CGOpenMPRuntime::emitTargetOutlinedFunction(
4779     const OMPExecutableDirective &D, StringRef ParentName,
4780     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
4781     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
4782   assert(!ParentName.empty() && "Invalid target region parent name!");
4783 
4784   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
4785                                    IsOffloadEntry, CodeGen);
4786 }
4787 
4788 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
4789     const OMPExecutableDirective &D, StringRef ParentName,
4790     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
4791     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
4792   // Create a unique name for the entry function using the source location
4793   // information of the current target region. The name will be something like:
4794   //
4795   // __omp_offloading_DD_FFFF_PP_lBB
4796   //
4797   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
4798   // mangled name of the function that encloses the target region and BB is the
4799   // line number of the target region.
4800 
4801   unsigned DeviceID;
4802   unsigned FileID;
4803   unsigned Line;
4804   getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID,
4805                            Line);
4806   SmallString<64> EntryFnName;
4807   {
4808     llvm::raw_svector_ostream OS(EntryFnName);
4809     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
4810        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
4811   }
4812 
4813   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
4814 
4815   CodeGenFunction CGF(CGM, true);
4816   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
4817   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
4818 
4819   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
4820 
4821   // If this target outline function is not an offload entry, we don't need to
4822   // register it.
4823   if (!IsOffloadEntry)
4824     return;
4825 
4826   // The target region ID is used by the runtime library to identify the current
4827   // target region, so it only has to be unique and not necessarily point to
4828   // anything. It could be the pointer to the outlined function that implements
4829   // the target region, but we aren't using that so that the compiler doesn't
4830   // need to keep that, and could therefore inline the host function if proven
4831   // worthwhile during optimization. In the other hand, if emitting code for the
4832   // device, the ID has to be the function address so that it can retrieved from
4833   // the offloading entry and launched by the runtime library. We also mark the
4834   // outlined function to have external linkage in case we are emitting code for
4835   // the device, because these functions will be entry points to the device.
4836 
4837   if (CGM.getLangOpts().OpenMPIsDevice) {
4838     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
4839     OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage);
4840   } else
4841     OutlinedFnID = new llvm::GlobalVariable(
4842         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
4843         llvm::GlobalValue::PrivateLinkage,
4844         llvm::Constant::getNullValue(CGM.Int8Ty), ".omp_offload.region_id");
4845 
4846   // Register the information for the entry associated with this target region.
4847   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
4848       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID);
4849 }
4850 
4851 /// discard all CompoundStmts intervening between two constructs
4852 static const Stmt *ignoreCompoundStmts(const Stmt *Body) {
4853   while (auto *CS = dyn_cast_or_null<CompoundStmt>(Body))
4854     Body = CS->body_front();
4855 
4856   return Body;
4857 }
4858 
4859 /// \brief Emit the num_teams clause of an enclosed teams directive at the
4860 /// target region scope. If there is no teams directive associated with the
4861 /// target directive, or if there is no num_teams clause associated with the
4862 /// enclosed teams directive, return nullptr.
4863 static llvm::Value *
4864 emitNumTeamsClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime,
4865                                      CodeGenFunction &CGF,
4866                                      const OMPExecutableDirective &D) {
4867 
4868   assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
4869                                               "teams directive expected to be "
4870                                               "emitted only for the host!");
4871 
4872   // FIXME: For the moment we do not support combined directives with target and
4873   // teams, so we do not expect to get any num_teams clause in the provided
4874   // directive. Once we support that, this assertion can be replaced by the
4875   // actual emission of the clause expression.
4876   assert(D.getSingleClause<OMPNumTeamsClause>() == nullptr &&
4877          "Not expecting clause in directive.");
4878 
4879   // If the current target region has a teams region enclosed, we need to get
4880   // the number of teams to pass to the runtime function call. This is done
4881   // by generating the expression in a inlined region. This is required because
4882   // the expression is captured in the enclosing target environment when the
4883   // teams directive is not combined with target.
4884 
4885   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
4886 
4887   // FIXME: Accommodate other combined directives with teams when they become
4888   // available.
4889   if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>(
4890           ignoreCompoundStmts(CS.getCapturedStmt()))) {
4891     if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) {
4892       CGOpenMPInnerExprInfo CGInfo(CGF, CS);
4893       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
4894       llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams());
4895       return CGF.Builder.CreateIntCast(NumTeams, CGF.Int32Ty,
4896                                        /*IsSigned=*/true);
4897     }
4898 
4899     // If we have an enclosed teams directive but no num_teams clause we use
4900     // the default value 0.
4901     return CGF.Builder.getInt32(0);
4902   }
4903 
4904   // No teams associated with the directive.
4905   return nullptr;
4906 }
4907 
4908 /// \brief Emit the thread_limit clause of an enclosed teams directive at the
4909 /// target region scope. If there is no teams directive associated with the
4910 /// target directive, or if there is no thread_limit clause associated with the
4911 /// enclosed teams directive, return nullptr.
4912 static llvm::Value *
4913 emitThreadLimitClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime,
4914                                         CodeGenFunction &CGF,
4915                                         const OMPExecutableDirective &D) {
4916 
4917   assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
4918                                               "teams directive expected to be "
4919                                               "emitted only for the host!");
4920 
4921   // FIXME: For the moment we do not support combined directives with target and
4922   // teams, so we do not expect to get any thread_limit clause in the provided
4923   // directive. Once we support that, this assertion can be replaced by the
4924   // actual emission of the clause expression.
4925   assert(D.getSingleClause<OMPThreadLimitClause>() == nullptr &&
4926          "Not expecting clause in directive.");
4927 
4928   // If the current target region has a teams region enclosed, we need to get
4929   // the thread limit to pass to the runtime function call. This is done
4930   // by generating the expression in a inlined region. This is required because
4931   // the expression is captured in the enclosing target environment when the
4932   // teams directive is not combined with target.
4933 
4934   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
4935 
4936   // FIXME: Accommodate other combined directives with teams when they become
4937   // available.
4938   if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>(
4939           ignoreCompoundStmts(CS.getCapturedStmt()))) {
4940     if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) {
4941       CGOpenMPInnerExprInfo CGInfo(CGF, CS);
4942       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
4943       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit());
4944       return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty,
4945                                        /*IsSigned=*/true);
4946     }
4947 
4948     // If we have an enclosed teams directive but no thread_limit clause we use
4949     // the default value 0.
4950     return CGF.Builder.getInt32(0);
4951   }
4952 
4953   // No teams associated with the directive.
4954   return nullptr;
4955 }
4956 
4957 namespace {
4958 // \brief Utility to handle information from clauses associated with a given
4959 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
4960 // It provides a convenient interface to obtain the information and generate
4961 // code for that information.
4962 class MappableExprsHandler {
4963 public:
4964   /// \brief Values for bit flags used to specify the mapping type for
4965   /// offloading.
4966   enum OpenMPOffloadMappingFlags {
4967     /// \brief Allocate memory on the device and move data from host to device.
4968     OMP_MAP_TO = 0x01,
4969     /// \brief Allocate memory on the device and move data from device to host.
4970     OMP_MAP_FROM = 0x02,
4971     /// \brief Always perform the requested mapping action on the element, even
4972     /// if it was already mapped before.
4973     OMP_MAP_ALWAYS = 0x04,
4974     /// \brief Delete the element from the device environment, ignoring the
4975     /// current reference count associated with the element.
4976     OMP_MAP_DELETE = 0x08,
4977     /// \brief The element being mapped is a pointer, therefore the pointee
4978     /// should be mapped as well.
4979     OMP_MAP_IS_PTR = 0x10,
4980     /// \brief This flags signals that an argument is the first one relating to
4981     /// a map/private clause expression. For some cases a single
4982     /// map/privatization results in multiple arguments passed to the runtime
4983     /// library.
4984     OMP_MAP_FIRST_REF = 0x20,
4985     /// \brief Signal that the runtime library has to return the device pointer
4986     /// in the current position for the data being mapped.
4987     OMP_MAP_RETURN_PTR = 0x40,
4988     /// \brief This flag signals that the reference being passed is a pointer to
4989     /// private data.
4990     OMP_MAP_PRIVATE_PTR = 0x80,
4991     /// \brief Pass the element to the device by value.
4992     OMP_MAP_PRIVATE_VAL = 0x100,
4993   };
4994 
4995   /// Class that associates information with a base pointer to be passed to the
4996   /// runtime library.
4997   class BasePointerInfo {
4998     /// The base pointer.
4999     llvm::Value *Ptr = nullptr;
5000     /// The base declaration that refers to this device pointer, or null if
5001     /// there is none.
5002     const ValueDecl *DevPtrDecl = nullptr;
5003 
5004   public:
5005     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
5006         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
5007     llvm::Value *operator*() const { return Ptr; }
5008     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
5009     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
5010   };
5011 
5012   typedef SmallVector<BasePointerInfo, 16> MapBaseValuesArrayTy;
5013   typedef SmallVector<llvm::Value *, 16> MapValuesArrayTy;
5014   typedef SmallVector<unsigned, 16> MapFlagsArrayTy;
5015 
5016 private:
5017   /// \brief Directive from where the map clauses were extracted.
5018   const OMPExecutableDirective &CurDir;
5019 
5020   /// \brief Function the directive is being generated for.
5021   CodeGenFunction &CGF;
5022 
5023   /// \brief Set of all first private variables in the current directive.
5024   llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls;
5025 
5026   /// Map between device pointer declarations and their expression components.
5027   /// The key value for declarations in 'this' is null.
5028   llvm::DenseMap<
5029       const ValueDecl *,
5030       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
5031       DevPointersMap;
5032 
5033   llvm::Value *getExprTypeSize(const Expr *E) const {
5034     auto ExprTy = E->getType().getCanonicalType();
5035 
5036     // Reference types are ignored for mapping purposes.
5037     if (auto *RefTy = ExprTy->getAs<ReferenceType>())
5038       ExprTy = RefTy->getPointeeType().getCanonicalType();
5039 
5040     // Given that an array section is considered a built-in type, we need to
5041     // do the calculation based on the length of the section instead of relying
5042     // on CGF.getTypeSize(E->getType()).
5043     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
5044       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
5045                             OAE->getBase()->IgnoreParenImpCasts())
5046                             .getCanonicalType();
5047 
5048       // If there is no length associated with the expression, that means we
5049       // are using the whole length of the base.
5050       if (!OAE->getLength() && OAE->getColonLoc().isValid())
5051         return CGF.getTypeSize(BaseTy);
5052 
5053       llvm::Value *ElemSize;
5054       if (auto *PTy = BaseTy->getAs<PointerType>())
5055         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
5056       else {
5057         auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
5058         assert(ATy && "Expecting array type if not a pointer type.");
5059         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
5060       }
5061 
5062       // If we don't have a length at this point, that is because we have an
5063       // array section with a single element.
5064       if (!OAE->getLength())
5065         return ElemSize;
5066 
5067       auto *LengthVal = CGF.EmitScalarExpr(OAE->getLength());
5068       LengthVal =
5069           CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false);
5070       return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
5071     }
5072     return CGF.getTypeSize(ExprTy);
5073   }
5074 
5075   /// \brief Return the corresponding bits for a given map clause modifier. Add
5076   /// a flag marking the map as a pointer if requested. Add a flag marking the
5077   /// map as the first one of a series of maps that relate to the same map
5078   /// expression.
5079   unsigned getMapTypeBits(OpenMPMapClauseKind MapType,
5080                           OpenMPMapClauseKind MapTypeModifier, bool AddPtrFlag,
5081                           bool AddIsFirstFlag) const {
5082     unsigned Bits = 0u;
5083     switch (MapType) {
5084     case OMPC_MAP_alloc:
5085     case OMPC_MAP_release:
5086       // alloc and release is the default behavior in the runtime library,  i.e.
5087       // if we don't pass any bits alloc/release that is what the runtime is
5088       // going to do. Therefore, we don't need to signal anything for these two
5089       // type modifiers.
5090       break;
5091     case OMPC_MAP_to:
5092       Bits = OMP_MAP_TO;
5093       break;
5094     case OMPC_MAP_from:
5095       Bits = OMP_MAP_FROM;
5096       break;
5097     case OMPC_MAP_tofrom:
5098       Bits = OMP_MAP_TO | OMP_MAP_FROM;
5099       break;
5100     case OMPC_MAP_delete:
5101       Bits = OMP_MAP_DELETE;
5102       break;
5103     default:
5104       llvm_unreachable("Unexpected map type!");
5105       break;
5106     }
5107     if (AddPtrFlag)
5108       Bits |= OMP_MAP_IS_PTR;
5109     if (AddIsFirstFlag)
5110       Bits |= OMP_MAP_FIRST_REF;
5111     if (MapTypeModifier == OMPC_MAP_always)
5112       Bits |= OMP_MAP_ALWAYS;
5113     return Bits;
5114   }
5115 
5116   /// \brief Return true if the provided expression is a final array section. A
5117   /// final array section, is one whose length can't be proved to be one.
5118   bool isFinalArraySectionExpression(const Expr *E) const {
5119     auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
5120 
5121     // It is not an array section and therefore not a unity-size one.
5122     if (!OASE)
5123       return false;
5124 
5125     // An array section with no colon always refer to a single element.
5126     if (OASE->getColonLoc().isInvalid())
5127       return false;
5128 
5129     auto *Length = OASE->getLength();
5130 
5131     // If we don't have a length we have to check if the array has size 1
5132     // for this dimension. Also, we should always expect a length if the
5133     // base type is pointer.
5134     if (!Length) {
5135       auto BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
5136                          OASE->getBase()->IgnoreParenImpCasts())
5137                          .getCanonicalType();
5138       if (auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
5139         return ATy->getSize().getSExtValue() != 1;
5140       // If we don't have a constant dimension length, we have to consider
5141       // the current section as having any size, so it is not necessarily
5142       // unitary. If it happen to be unity size, that's user fault.
5143       return true;
5144     }
5145 
5146     // Check if the length evaluates to 1.
5147     llvm::APSInt ConstLength;
5148     if (!Length->EvaluateAsInt(ConstLength, CGF.getContext()))
5149       return true; // Can have more that size 1.
5150 
5151     return ConstLength.getSExtValue() != 1;
5152   }
5153 
5154   /// \brief Generate the base pointers, section pointers, sizes and map type
5155   /// bits for the provided map type, map modifier, and expression components.
5156   /// \a IsFirstComponent should be set to true if the provided set of
5157   /// components is the first associated with a capture.
5158   void generateInfoForComponentList(
5159       OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier,
5160       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
5161       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
5162       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
5163       bool IsFirstComponentList) const {
5164 
5165     // The following summarizes what has to be generated for each map and the
5166     // types bellow. The generated information is expressed in this order:
5167     // base pointer, section pointer, size, flags
5168     // (to add to the ones that come from the map type and modifier).
5169     //
5170     // double d;
5171     // int i[100];
5172     // float *p;
5173     //
5174     // struct S1 {
5175     //   int i;
5176     //   float f[50];
5177     // }
5178     // struct S2 {
5179     //   int i;
5180     //   float f[50];
5181     //   S1 s;
5182     //   double *p;
5183     //   struct S2 *ps;
5184     // }
5185     // S2 s;
5186     // S2 *ps;
5187     //
5188     // map(d)
5189     // &d, &d, sizeof(double), noflags
5190     //
5191     // map(i)
5192     // &i, &i, 100*sizeof(int), noflags
5193     //
5194     // map(i[1:23])
5195     // &i(=&i[0]), &i[1], 23*sizeof(int), noflags
5196     //
5197     // map(p)
5198     // &p, &p, sizeof(float*), noflags
5199     //
5200     // map(p[1:24])
5201     // p, &p[1], 24*sizeof(float), noflags
5202     //
5203     // map(s)
5204     // &s, &s, sizeof(S2), noflags
5205     //
5206     // map(s.i)
5207     // &s, &(s.i), sizeof(int), noflags
5208     //
5209     // map(s.s.f)
5210     // &s, &(s.i.f), 50*sizeof(int), noflags
5211     //
5212     // map(s.p)
5213     // &s, &(s.p), sizeof(double*), noflags
5214     //
5215     // map(s.p[:22], s.a s.b)
5216     // &s, &(s.p), sizeof(double*), noflags
5217     // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag + extra_flag
5218     //
5219     // map(s.ps)
5220     // &s, &(s.ps), sizeof(S2*), noflags
5221     //
5222     // map(s.ps->s.i)
5223     // &s, &(s.ps), sizeof(S2*), noflags
5224     // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag + extra_flag
5225     //
5226     // map(s.ps->ps)
5227     // &s, &(s.ps), sizeof(S2*), noflags
5228     // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
5229     //
5230     // map(s.ps->ps->ps)
5231     // &s, &(s.ps), sizeof(S2*), noflags
5232     // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
5233     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
5234     //
5235     // map(s.ps->ps->s.f[:22])
5236     // &s, &(s.ps), sizeof(S2*), noflags
5237     // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
5238     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + extra_flag
5239     //
5240     // map(ps)
5241     // &ps, &ps, sizeof(S2*), noflags
5242     //
5243     // map(ps->i)
5244     // ps, &(ps->i), sizeof(int), noflags
5245     //
5246     // map(ps->s.f)
5247     // ps, &(ps->s.f[0]), 50*sizeof(float), noflags
5248     //
5249     // map(ps->p)
5250     // ps, &(ps->p), sizeof(double*), noflags
5251     //
5252     // map(ps->p[:22])
5253     // ps, &(ps->p), sizeof(double*), noflags
5254     // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag + extra_flag
5255     //
5256     // map(ps->ps)
5257     // ps, &(ps->ps), sizeof(S2*), noflags
5258     //
5259     // map(ps->ps->s.i)
5260     // ps, &(ps->ps), sizeof(S2*), noflags
5261     // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag + extra_flag
5262     //
5263     // map(ps->ps->ps)
5264     // ps, &(ps->ps), sizeof(S2*), noflags
5265     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
5266     //
5267     // map(ps->ps->ps->ps)
5268     // ps, &(ps->ps), sizeof(S2*), noflags
5269     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
5270     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
5271     //
5272     // map(ps->ps->ps->s.f[:22])
5273     // ps, &(ps->ps), sizeof(S2*), noflags
5274     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
5275     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag +
5276     // extra_flag
5277 
5278     // Track if the map information being generated is the first for a capture.
5279     bool IsCaptureFirstInfo = IsFirstComponentList;
5280 
5281     // Scan the components from the base to the complete expression.
5282     auto CI = Components.rbegin();
5283     auto CE = Components.rend();
5284     auto I = CI;
5285 
5286     // Track if the map information being generated is the first for a list of
5287     // components.
5288     bool IsExpressionFirstInfo = true;
5289     llvm::Value *BP = nullptr;
5290 
5291     if (auto *ME = dyn_cast<MemberExpr>(I->getAssociatedExpression())) {
5292       // The base is the 'this' pointer. The content of the pointer is going
5293       // to be the base of the field being mapped.
5294       BP = CGF.EmitScalarExpr(ME->getBase());
5295     } else {
5296       // The base is the reference to the variable.
5297       // BP = &Var.
5298       BP = CGF.EmitLValue(cast<DeclRefExpr>(I->getAssociatedExpression()))
5299                .getPointer();
5300 
5301       // If the variable is a pointer and is being dereferenced (i.e. is not
5302       // the last component), the base has to be the pointer itself, not its
5303       // reference. References are ignored for mapping purposes.
5304       QualType Ty =
5305           I->getAssociatedDeclaration()->getType().getNonReferenceType();
5306       if (Ty->isAnyPointerType() && std::next(I) != CE) {
5307         auto PtrAddr = CGF.MakeNaturalAlignAddrLValue(BP, Ty);
5308         BP = CGF.EmitLoadOfPointerLValue(PtrAddr.getAddress(),
5309                                          Ty->castAs<PointerType>())
5310                  .getPointer();
5311 
5312         // We do not need to generate individual map information for the
5313         // pointer, it can be associated with the combined storage.
5314         ++I;
5315       }
5316     }
5317 
5318     for (; I != CE; ++I) {
5319       auto Next = std::next(I);
5320 
5321       // We need to generate the addresses and sizes if this is the last
5322       // component, if the component is a pointer or if it is an array section
5323       // whose length can't be proved to be one. If this is a pointer, it
5324       // becomes the base address for the following components.
5325 
5326       // A final array section, is one whose length can't be proved to be one.
5327       bool IsFinalArraySection =
5328           isFinalArraySectionExpression(I->getAssociatedExpression());
5329 
5330       // Get information on whether the element is a pointer. Have to do a
5331       // special treatment for array sections given that they are built-in
5332       // types.
5333       const auto *OASE =
5334           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
5335       bool IsPointer =
5336           (OASE &&
5337            OMPArraySectionExpr::getBaseOriginalType(OASE)
5338                .getCanonicalType()
5339                ->isAnyPointerType()) ||
5340           I->getAssociatedExpression()->getType()->isAnyPointerType();
5341 
5342       if (Next == CE || IsPointer || IsFinalArraySection) {
5343 
5344         // If this is not the last component, we expect the pointer to be
5345         // associated with an array expression or member expression.
5346         assert((Next == CE ||
5347                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
5348                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
5349                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
5350                "Unexpected expression");
5351 
5352         auto *LB = CGF.EmitLValue(I->getAssociatedExpression()).getPointer();
5353         auto *Size = getExprTypeSize(I->getAssociatedExpression());
5354 
5355         // If we have a member expression and the current component is a
5356         // reference, we have to map the reference too. Whenever we have a
5357         // reference, the section that reference refers to is going to be a
5358         // load instruction from the storage assigned to the reference.
5359         if (isa<MemberExpr>(I->getAssociatedExpression()) &&
5360             I->getAssociatedDeclaration()->getType()->isReferenceType()) {
5361           auto *LI = cast<llvm::LoadInst>(LB);
5362           auto *RefAddr = LI->getPointerOperand();
5363 
5364           BasePointers.push_back(BP);
5365           Pointers.push_back(RefAddr);
5366           Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
5367           Types.push_back(getMapTypeBits(
5368               /*MapType*/ OMPC_MAP_alloc, /*MapTypeModifier=*/OMPC_MAP_unknown,
5369               !IsExpressionFirstInfo, IsCaptureFirstInfo));
5370           IsExpressionFirstInfo = false;
5371           IsCaptureFirstInfo = false;
5372           // The reference will be the next base address.
5373           BP = RefAddr;
5374         }
5375 
5376         BasePointers.push_back(BP);
5377         Pointers.push_back(LB);
5378         Sizes.push_back(Size);
5379 
5380         // We need to add a pointer flag for each map that comes from the
5381         // same expression except for the first one. We also need to signal
5382         // this map is the first one that relates with the current capture
5383         // (there is a set of entries for each capture).
5384         Types.push_back(getMapTypeBits(MapType, MapTypeModifier,
5385                                        !IsExpressionFirstInfo,
5386                                        IsCaptureFirstInfo));
5387 
5388         // If we have a final array section, we are done with this expression.
5389         if (IsFinalArraySection)
5390           break;
5391 
5392         // The pointer becomes the base for the next element.
5393         if (Next != CE)
5394           BP = LB;
5395 
5396         IsExpressionFirstInfo = false;
5397         IsCaptureFirstInfo = false;
5398         continue;
5399       }
5400     }
5401   }
5402 
5403   /// \brief Return the adjusted map modifiers if the declaration a capture
5404   /// refers to appears in a first-private clause. This is expected to be used
5405   /// only with directives that start with 'target'.
5406   unsigned adjustMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap,
5407                                                unsigned CurrentModifiers) {
5408     assert(Cap.capturesVariable() && "Expected capture by reference only!");
5409 
5410     // A first private variable captured by reference will use only the
5411     // 'private ptr' and 'map to' flag. Return the right flags if the captured
5412     // declaration is known as first-private in this handler.
5413     if (FirstPrivateDecls.count(Cap.getCapturedVar()))
5414       return MappableExprsHandler::OMP_MAP_PRIVATE_PTR |
5415              MappableExprsHandler::OMP_MAP_TO;
5416 
5417     // We didn't modify anything.
5418     return CurrentModifiers;
5419   }
5420 
5421 public:
5422   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
5423       : CurDir(Dir), CGF(CGF) {
5424     // Extract firstprivate clause information.
5425     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
5426       for (const auto *D : C->varlists())
5427         FirstPrivateDecls.insert(
5428             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
5429     // Extract device pointer clause information.
5430     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
5431       for (auto L : C->component_lists())
5432         DevPointersMap[L.first].push_back(L.second);
5433   }
5434 
5435   /// \brief Generate all the base pointers, section pointers, sizes and map
5436   /// types for the extracted mappable expressions. Also, for each item that
5437   /// relates with a device pointer, a pair of the relevant declaration and
5438   /// index where it occurs is appended to the device pointers info array.
5439   void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
5440                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
5441                        MapFlagsArrayTy &Types) const {
5442     BasePointers.clear();
5443     Pointers.clear();
5444     Sizes.clear();
5445     Types.clear();
5446 
5447     struct MapInfo {
5448       /// Kind that defines how a device pointer has to be returned.
5449       enum ReturnPointerKind {
5450         // Don't have to return any pointer.
5451         RPK_None,
5452         // Pointer is the base of the declaration.
5453         RPK_Base,
5454         // Pointer is a member of the base declaration - 'this'
5455         RPK_Member,
5456         // Pointer is a reference and a member of the base declaration - 'this'
5457         RPK_MemberReference,
5458       };
5459       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
5460       OpenMPMapClauseKind MapType;
5461       OpenMPMapClauseKind MapTypeModifier;
5462       ReturnPointerKind ReturnDevicePointer;
5463 
5464       MapInfo()
5465           : MapType(OMPC_MAP_unknown), MapTypeModifier(OMPC_MAP_unknown),
5466             ReturnDevicePointer(RPK_None) {}
5467       MapInfo(
5468           OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
5469           OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier,
5470           ReturnPointerKind ReturnDevicePointer)
5471           : Components(Components), MapType(MapType),
5472             MapTypeModifier(MapTypeModifier),
5473             ReturnDevicePointer(ReturnDevicePointer) {}
5474     };
5475 
5476     // We have to process the component lists that relate with the same
5477     // declaration in a single chunk so that we can generate the map flags
5478     // correctly. Therefore, we organize all lists in a map.
5479     llvm::DenseMap<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
5480 
5481     // Helper function to fill the information map for the different supported
5482     // clauses.
5483     auto &&InfoGen = [&Info](
5484         const ValueDecl *D,
5485         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
5486         OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier,
5487         MapInfo::ReturnPointerKind ReturnDevicePointer) {
5488       const ValueDecl *VD =
5489           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
5490       Info[VD].push_back({L, MapType, MapModifier, ReturnDevicePointer});
5491     };
5492 
5493     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
5494     for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
5495       for (auto L : C->component_lists())
5496         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier(),
5497                 MapInfo::RPK_None);
5498     for (auto *C : this->CurDir.getClausesOfKind<OMPToClause>())
5499       for (auto L : C->component_lists())
5500         InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown,
5501                 MapInfo::RPK_None);
5502     for (auto *C : this->CurDir.getClausesOfKind<OMPFromClause>())
5503       for (auto L : C->component_lists())
5504         InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown,
5505                 MapInfo::RPK_None);
5506 
5507     // Look at the use_device_ptr clause information and mark the existing map
5508     // entries as such. If there is no map information for an entry in the
5509     // use_device_ptr list, we create one with map type 'alloc' and zero size
5510     // section. It is the user fault if that was not mapped before.
5511     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
5512     for (auto *C : this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>())
5513       for (auto L : C->component_lists()) {
5514         assert(!L.second.empty() && "Not expecting empty list of components!");
5515         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
5516         VD = cast<ValueDecl>(VD->getCanonicalDecl());
5517         auto *IE = L.second.back().getAssociatedExpression();
5518         // If the first component is a member expression, we have to look into
5519         // 'this', which maps to null in the map of map information. Otherwise
5520         // look directly for the information.
5521         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
5522 
5523         // We potentially have map information for this declaration already.
5524         // Look for the first set of components that refer to it.
5525         if (It != Info.end()) {
5526           auto CI = std::find_if(
5527               It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
5528                 return MI.Components.back().getAssociatedDeclaration() == VD;
5529               });
5530           // If we found a map entry, signal that the pointer has to be returned
5531           // and move on to the next declaration.
5532           if (CI != It->second.end()) {
5533             CI->ReturnDevicePointer = isa<MemberExpr>(IE)
5534                                           ? (VD->getType()->isReferenceType()
5535                                                  ? MapInfo::RPK_MemberReference
5536                                                  : MapInfo::RPK_Member)
5537                                           : MapInfo::RPK_Base;
5538             continue;
5539           }
5540         }
5541 
5542         // We didn't find any match in our map information - generate a zero
5543         // size array section.
5544         // FIXME: MSVC 2013 seems to require this-> to find member CGF.
5545         llvm::Value *Ptr =
5546             this->CGF
5547                 .EmitLoadOfLValue(this->CGF.EmitLValue(IE), SourceLocation())
5548                 .getScalarVal();
5549         BasePointers.push_back({Ptr, VD});
5550         Pointers.push_back(Ptr);
5551         Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy));
5552         Types.push_back(OMP_MAP_RETURN_PTR | OMP_MAP_FIRST_REF);
5553       }
5554 
5555     for (auto &M : Info) {
5556       // We need to know when we generate information for the first component
5557       // associated with a capture, because the mapping flags depend on it.
5558       bool IsFirstComponentList = true;
5559       for (MapInfo &L : M.second) {
5560         assert(!L.Components.empty() &&
5561                "Not expecting declaration with no component lists.");
5562 
5563         // Remember the current base pointer index.
5564         unsigned CurrentBasePointersIdx = BasePointers.size();
5565         // FIXME: MSVC 2013 seems to require this-> to find the member method.
5566         this->generateInfoForComponentList(L.MapType, L.MapTypeModifier,
5567                                            L.Components, BasePointers, Pointers,
5568                                            Sizes, Types, IsFirstComponentList);
5569 
5570         // If this entry relates with a device pointer, set the relevant
5571         // declaration and add the 'return pointer' flag.
5572         if (IsFirstComponentList &&
5573             L.ReturnDevicePointer != MapInfo::RPK_None) {
5574           // If the pointer is not the base of the map, we need to skip the
5575           // base. If it is a reference in a member field, we also need to skip
5576           // the map of the reference.
5577           if (L.ReturnDevicePointer != MapInfo::RPK_Base) {
5578             ++CurrentBasePointersIdx;
5579             if (L.ReturnDevicePointer == MapInfo::RPK_MemberReference)
5580               ++CurrentBasePointersIdx;
5581           }
5582           assert(BasePointers.size() > CurrentBasePointersIdx &&
5583                  "Unexpected number of mapped base pointers.");
5584 
5585           auto *RelevantVD = L.Components.back().getAssociatedDeclaration();
5586           assert(RelevantVD &&
5587                  "No relevant declaration related with device pointer??");
5588 
5589           BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
5590           Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PTR;
5591         }
5592         IsFirstComponentList = false;
5593       }
5594     }
5595   }
5596 
5597   /// \brief Generate the base pointers, section pointers, sizes and map types
5598   /// associated to a given capture.
5599   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
5600                               llvm::Value *Arg,
5601                               MapBaseValuesArrayTy &BasePointers,
5602                               MapValuesArrayTy &Pointers,
5603                               MapValuesArrayTy &Sizes,
5604                               MapFlagsArrayTy &Types) const {
5605     assert(!Cap->capturesVariableArrayType() &&
5606            "Not expecting to generate map info for a variable array type!");
5607 
5608     BasePointers.clear();
5609     Pointers.clear();
5610     Sizes.clear();
5611     Types.clear();
5612 
5613     // We need to know when we generating information for the first component
5614     // associated with a capture, because the mapping flags depend on it.
5615     bool IsFirstComponentList = true;
5616 
5617     const ValueDecl *VD =
5618         Cap->capturesThis()
5619             ? nullptr
5620             : cast<ValueDecl>(Cap->getCapturedVar()->getCanonicalDecl());
5621 
5622     // If this declaration appears in a is_device_ptr clause we just have to
5623     // pass the pointer by value. If it is a reference to a declaration, we just
5624     // pass its value, otherwise, if it is a member expression, we need to map
5625     // 'to' the field.
5626     if (!VD) {
5627       auto It = DevPointersMap.find(VD);
5628       if (It != DevPointersMap.end()) {
5629         for (auto L : It->second) {
5630           generateInfoForComponentList(
5631               /*MapType=*/OMPC_MAP_to, /*MapTypeModifier=*/OMPC_MAP_unknown, L,
5632               BasePointers, Pointers, Sizes, Types, IsFirstComponentList);
5633           IsFirstComponentList = false;
5634         }
5635         return;
5636       }
5637     } else if (DevPointersMap.count(VD)) {
5638       BasePointers.push_back({Arg, VD});
5639       Pointers.push_back(Arg);
5640       Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
5641       Types.push_back(OMP_MAP_PRIVATE_VAL | OMP_MAP_FIRST_REF);
5642       return;
5643     }
5644 
5645     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
5646     for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
5647       for (auto L : C->decl_component_lists(VD)) {
5648         assert(L.first == VD &&
5649                "We got information for the wrong declaration??");
5650         assert(!L.second.empty() &&
5651                "Not expecting declaration with no component lists.");
5652         generateInfoForComponentList(C->getMapType(), C->getMapTypeModifier(),
5653                                      L.second, BasePointers, Pointers, Sizes,
5654                                      Types, IsFirstComponentList);
5655         IsFirstComponentList = false;
5656       }
5657 
5658     return;
5659   }
5660 
5661   /// \brief Generate the default map information for a given capture \a CI,
5662   /// record field declaration \a RI and captured value \a CV.
5663   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
5664                               const FieldDecl &RI, llvm::Value *CV,
5665                               MapBaseValuesArrayTy &CurBasePointers,
5666                               MapValuesArrayTy &CurPointers,
5667                               MapValuesArrayTy &CurSizes,
5668                               MapFlagsArrayTy &CurMapTypes) {
5669 
5670     // Do the default mapping.
5671     if (CI.capturesThis()) {
5672       CurBasePointers.push_back(CV);
5673       CurPointers.push_back(CV);
5674       const PointerType *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
5675       CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType()));
5676       // Default map type.
5677       CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
5678     } else if (CI.capturesVariableByCopy()) {
5679       CurBasePointers.push_back(CV);
5680       CurPointers.push_back(CV);
5681       if (!RI.getType()->isAnyPointerType()) {
5682         // We have to signal to the runtime captures passed by value that are
5683         // not pointers.
5684         CurMapTypes.push_back(OMP_MAP_PRIVATE_VAL);
5685         CurSizes.push_back(CGF.getTypeSize(RI.getType()));
5686       } else {
5687         // Pointers are implicitly mapped with a zero size and no flags
5688         // (other than first map that is added for all implicit maps).
5689         CurMapTypes.push_back(0u);
5690         CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy));
5691       }
5692     } else {
5693       assert(CI.capturesVariable() && "Expected captured reference.");
5694       CurBasePointers.push_back(CV);
5695       CurPointers.push_back(CV);
5696 
5697       const ReferenceType *PtrTy =
5698           cast<ReferenceType>(RI.getType().getTypePtr());
5699       QualType ElementType = PtrTy->getPointeeType();
5700       CurSizes.push_back(CGF.getTypeSize(ElementType));
5701       // The default map type for a scalar/complex type is 'to' because by
5702       // default the value doesn't have to be retrieved. For an aggregate
5703       // type, the default is 'tofrom'.
5704       CurMapTypes.push_back(ElementType->isAggregateType()
5705                                 ? (OMP_MAP_TO | OMP_MAP_FROM)
5706                                 : OMP_MAP_TO);
5707 
5708       // If we have a capture by reference we may need to add the private
5709       // pointer flag if the base declaration shows in some first-private
5710       // clause.
5711       CurMapTypes.back() =
5712           adjustMapModifiersForPrivateClauses(CI, CurMapTypes.back());
5713     }
5714     // Every default map produces a single argument, so, it is always the
5715     // first one.
5716     CurMapTypes.back() |= OMP_MAP_FIRST_REF;
5717   }
5718 };
5719 
5720 enum OpenMPOffloadingReservedDeviceIDs {
5721   /// \brief Device ID if the device was not defined, runtime should get it
5722   /// from environment variables in the spec.
5723   OMP_DEVICEID_UNDEF = -1,
5724 };
5725 } // anonymous namespace
5726 
5727 /// \brief Emit the arrays used to pass the captures and map information to the
5728 /// offloading runtime library. If there is no map or capture information,
5729 /// return nullptr by reference.
5730 static void
5731 emitOffloadingArrays(CodeGenFunction &CGF,
5732                      MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
5733                      MappableExprsHandler::MapValuesArrayTy &Pointers,
5734                      MappableExprsHandler::MapValuesArrayTy &Sizes,
5735                      MappableExprsHandler::MapFlagsArrayTy &MapTypes,
5736                      CGOpenMPRuntime::TargetDataInfo &Info) {
5737   auto &CGM = CGF.CGM;
5738   auto &Ctx = CGF.getContext();
5739 
5740   // Reset the array information.
5741   Info.clearArrayInfo();
5742   Info.NumberOfPtrs = BasePointers.size();
5743 
5744   if (Info.NumberOfPtrs) {
5745     // Detect if we have any capture size requiring runtime evaluation of the
5746     // size so that a constant array could be eventually used.
5747     bool hasRuntimeEvaluationCaptureSize = false;
5748     for (auto *S : Sizes)
5749       if (!isa<llvm::Constant>(S)) {
5750         hasRuntimeEvaluationCaptureSize = true;
5751         break;
5752       }
5753 
5754     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
5755     QualType PointerArrayType =
5756         Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
5757                                  /*IndexTypeQuals=*/0);
5758 
5759     Info.BasePointersArray =
5760         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
5761     Info.PointersArray =
5762         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
5763 
5764     // If we don't have any VLA types or other types that require runtime
5765     // evaluation, we can use a constant array for the map sizes, otherwise we
5766     // need to fill up the arrays as we do for the pointers.
5767     if (hasRuntimeEvaluationCaptureSize) {
5768       QualType SizeArrayType = Ctx.getConstantArrayType(
5769           Ctx.getSizeType(), PointerNumAP, ArrayType::Normal,
5770           /*IndexTypeQuals=*/0);
5771       Info.SizesArray =
5772           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
5773     } else {
5774       // We expect all the sizes to be constant, so we collect them to create
5775       // a constant array.
5776       SmallVector<llvm::Constant *, 16> ConstSizes;
5777       for (auto S : Sizes)
5778         ConstSizes.push_back(cast<llvm::Constant>(S));
5779 
5780       auto *SizesArrayInit = llvm::ConstantArray::get(
5781           llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes);
5782       auto *SizesArrayGbl = new llvm::GlobalVariable(
5783           CGM.getModule(), SizesArrayInit->getType(),
5784           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
5785           SizesArrayInit, ".offload_sizes");
5786       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
5787       Info.SizesArray = SizesArrayGbl;
5788     }
5789 
5790     // The map types are always constant so we don't need to generate code to
5791     // fill arrays. Instead, we create an array constant.
5792     llvm::Constant *MapTypesArrayInit =
5793         llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes);
5794     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
5795         CGM.getModule(), MapTypesArrayInit->getType(),
5796         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
5797         MapTypesArrayInit, ".offload_maptypes");
5798     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
5799     Info.MapTypesArray = MapTypesArrayGbl;
5800 
5801     for (unsigned i = 0; i < Info.NumberOfPtrs; ++i) {
5802       llvm::Value *BPVal = *BasePointers[i];
5803       if (BPVal->getType()->isPointerTy())
5804         BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy);
5805       else {
5806         assert(BPVal->getType()->isIntegerTy() &&
5807                "If not a pointer, the value type must be an integer.");
5808         BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy);
5809       }
5810       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
5811           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
5812           Info.BasePointersArray, 0, i);
5813       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
5814       CGF.Builder.CreateStore(BPVal, BPAddr);
5815 
5816       if (Info.requiresDevicePointerInfo())
5817         if (auto *DevVD = BasePointers[i].getDevicePtrDecl())
5818           Info.CaptureDeviceAddrMap.insert(std::make_pair(DevVD, BPAddr));
5819 
5820       llvm::Value *PVal = Pointers[i];
5821       if (PVal->getType()->isPointerTy())
5822         PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy);
5823       else {
5824         assert(PVal->getType()->isIntegerTy() &&
5825                "If not a pointer, the value type must be an integer.");
5826         PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy);
5827       }
5828       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
5829           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
5830           Info.PointersArray, 0, i);
5831       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
5832       CGF.Builder.CreateStore(PVal, PAddr);
5833 
5834       if (hasRuntimeEvaluationCaptureSize) {
5835         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
5836             llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs),
5837             Info.SizesArray,
5838             /*Idx0=*/0,
5839             /*Idx1=*/i);
5840         Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType()));
5841         CGF.Builder.CreateStore(
5842             CGF.Builder.CreateIntCast(Sizes[i], CGM.SizeTy, /*isSigned=*/true),
5843             SAddr);
5844       }
5845     }
5846   }
5847 }
5848 /// \brief Emit the arguments to be passed to the runtime library based on the
5849 /// arrays of pointers, sizes and map types.
5850 static void emitOffloadingArraysArgument(
5851     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
5852     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
5853     llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
5854   auto &CGM = CGF.CGM;
5855   if (Info.NumberOfPtrs) {
5856     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
5857         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
5858         Info.BasePointersArray,
5859         /*Idx0=*/0, /*Idx1=*/0);
5860     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
5861         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
5862         Info.PointersArray,
5863         /*Idx0=*/0,
5864         /*Idx1=*/0);
5865     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
5866         llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray,
5867         /*Idx0=*/0, /*Idx1=*/0);
5868     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
5869         llvm::ArrayType::get(CGM.Int32Ty, Info.NumberOfPtrs),
5870         Info.MapTypesArray,
5871         /*Idx0=*/0,
5872         /*Idx1=*/0);
5873   } else {
5874     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
5875     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
5876     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo());
5877     MapTypesArrayArg =
5878         llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo());
5879   }
5880 }
5881 
5882 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
5883                                      const OMPExecutableDirective &D,
5884                                      llvm::Value *OutlinedFn,
5885                                      llvm::Value *OutlinedFnID,
5886                                      const Expr *IfCond, const Expr *Device,
5887                                      ArrayRef<llvm::Value *> CapturedVars) {
5888   if (!CGF.HaveInsertPoint())
5889     return;
5890 
5891   assert(OutlinedFn && "Invalid outlined function!");
5892 
5893   auto &Ctx = CGF.getContext();
5894 
5895   // Fill up the arrays with all the captured variables.
5896   MappableExprsHandler::MapValuesArrayTy KernelArgs;
5897   MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
5898   MappableExprsHandler::MapValuesArrayTy Pointers;
5899   MappableExprsHandler::MapValuesArrayTy Sizes;
5900   MappableExprsHandler::MapFlagsArrayTy MapTypes;
5901 
5902   MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
5903   MappableExprsHandler::MapValuesArrayTy CurPointers;
5904   MappableExprsHandler::MapValuesArrayTy CurSizes;
5905   MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
5906 
5907   // Get mappable expression information.
5908   MappableExprsHandler MEHandler(D, CGF);
5909 
5910   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
5911   auto RI = CS.getCapturedRecordDecl()->field_begin();
5912   auto CV = CapturedVars.begin();
5913   for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
5914                                             CE = CS.capture_end();
5915        CI != CE; ++CI, ++RI, ++CV) {
5916     StringRef Name;
5917     QualType Ty;
5918 
5919     CurBasePointers.clear();
5920     CurPointers.clear();
5921     CurSizes.clear();
5922     CurMapTypes.clear();
5923 
5924     // VLA sizes are passed to the outlined region by copy and do not have map
5925     // information associated.
5926     if (CI->capturesVariableArrayType()) {
5927       CurBasePointers.push_back(*CV);
5928       CurPointers.push_back(*CV);
5929       CurSizes.push_back(CGF.getTypeSize(RI->getType()));
5930       // Copy to the device as an argument. No need to retrieve it.
5931       CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_PRIVATE_VAL |
5932                             MappableExprsHandler::OMP_MAP_FIRST_REF);
5933     } else {
5934       // If we have any information in the map clause, we use it, otherwise we
5935       // just do a default mapping.
5936       MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
5937                                        CurSizes, CurMapTypes);
5938       if (CurBasePointers.empty())
5939         MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
5940                                          CurPointers, CurSizes, CurMapTypes);
5941     }
5942     // We expect to have at least an element of information for this capture.
5943     assert(!CurBasePointers.empty() && "Non-existing map pointer for capture!");
5944     assert(CurBasePointers.size() == CurPointers.size() &&
5945            CurBasePointers.size() == CurSizes.size() &&
5946            CurBasePointers.size() == CurMapTypes.size() &&
5947            "Inconsistent map information sizes!");
5948 
5949     // The kernel args are always the first elements of the base pointers
5950     // associated with a capture.
5951     KernelArgs.push_back(*CurBasePointers.front());
5952     // We need to append the results of this capture to what we already have.
5953     BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
5954     Pointers.append(CurPointers.begin(), CurPointers.end());
5955     Sizes.append(CurSizes.begin(), CurSizes.end());
5956     MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
5957   }
5958 
5959   // Keep track on whether the host function has to be executed.
5960   auto OffloadErrorQType =
5961       Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true);
5962   auto OffloadError = CGF.MakeAddrLValue(
5963       CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"),
5964       OffloadErrorQType);
5965   CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty),
5966                         OffloadError);
5967 
5968   // Fill up the pointer arrays and transfer execution to the device.
5969   auto &&ThenGen = [&Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes, Device,
5970                     OutlinedFnID, OffloadError, OffloadErrorQType,
5971                     &D](CodeGenFunction &CGF, PrePostActionTy &) {
5972     auto &RT = CGF.CGM.getOpenMPRuntime();
5973     // Emit the offloading arrays.
5974     TargetDataInfo Info;
5975     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
5976     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
5977                                  Info.PointersArray, Info.SizesArray,
5978                                  Info.MapTypesArray, Info);
5979 
5980     // On top of the arrays that were filled up, the target offloading call
5981     // takes as arguments the device id as well as the host pointer. The host
5982     // pointer is used by the runtime library to identify the current target
5983     // region, so it only has to be unique and not necessarily point to
5984     // anything. It could be the pointer to the outlined function that
5985     // implements the target region, but we aren't using that so that the
5986     // compiler doesn't need to keep that, and could therefore inline the host
5987     // function if proven worthwhile during optimization.
5988 
5989     // From this point on, we need to have an ID of the target region defined.
5990     assert(OutlinedFnID && "Invalid outlined function ID!");
5991 
5992     // Emit device ID if any.
5993     llvm::Value *DeviceID;
5994     if (Device)
5995       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
5996                                            CGF.Int32Ty, /*isSigned=*/true);
5997     else
5998       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
5999 
6000     // Emit the number of elements in the offloading arrays.
6001     llvm::Value *PointerNum = CGF.Builder.getInt32(BasePointers.size());
6002 
6003     // Return value of the runtime offloading call.
6004     llvm::Value *Return;
6005 
6006     auto *NumTeams = emitNumTeamsClauseForTargetDirective(RT, CGF, D);
6007     auto *ThreadLimit = emitThreadLimitClauseForTargetDirective(RT, CGF, D);
6008 
6009     // If we have NumTeams defined this means that we have an enclosed teams
6010     // region. Therefore we also expect to have ThreadLimit defined. These two
6011     // values should be defined in the presence of a teams directive, regardless
6012     // of having any clauses associated. If the user is using teams but no
6013     // clauses, these two values will be the default that should be passed to
6014     // the runtime library - a 32-bit integer with the value zero.
6015     if (NumTeams) {
6016       assert(ThreadLimit && "Thread limit expression should be available along "
6017                             "with number of teams.");
6018       llvm::Value *OffloadingArgs[] = {
6019           DeviceID,           OutlinedFnID,
6020           PointerNum,         Info.BasePointersArray,
6021           Info.PointersArray, Info.SizesArray,
6022           Info.MapTypesArray, NumTeams,
6023           ThreadLimit};
6024       Return = CGF.EmitRuntimeCall(
6025           RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs);
6026     } else {
6027       llvm::Value *OffloadingArgs[] = {
6028           DeviceID,           OutlinedFnID,
6029           PointerNum,         Info.BasePointersArray,
6030           Info.PointersArray, Info.SizesArray,
6031           Info.MapTypesArray};
6032       Return = CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target),
6033                                    OffloadingArgs);
6034     }
6035 
6036     CGF.EmitStoreOfScalar(Return, OffloadError);
6037   };
6038 
6039   // Notify that the host version must be executed.
6040   auto &&ElseGen = [OffloadError](CodeGenFunction &CGF, PrePostActionTy &) {
6041     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/-1u),
6042                           OffloadError);
6043   };
6044 
6045   // If we have a target function ID it means that we need to support
6046   // offloading, otherwise, just execute on the host. We need to execute on host
6047   // regardless of the conditional in the if clause if, e.g., the user do not
6048   // specify target triples.
6049   if (OutlinedFnID) {
6050     if (IfCond)
6051       emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
6052     else {
6053       RegionCodeGenTy ThenRCG(ThenGen);
6054       ThenRCG(CGF);
6055     }
6056   } else {
6057     RegionCodeGenTy ElseRCG(ElseGen);
6058     ElseRCG(CGF);
6059   }
6060 
6061   // Check the error code and execute the host version if required.
6062   auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed");
6063   auto OffloadContBlock = CGF.createBasicBlock("omp_offload.cont");
6064   auto OffloadErrorVal = CGF.EmitLoadOfScalar(OffloadError, SourceLocation());
6065   auto Failed = CGF.Builder.CreateIsNotNull(OffloadErrorVal);
6066   CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
6067 
6068   CGF.EmitBlock(OffloadFailedBlock);
6069   CGF.Builder.CreateCall(OutlinedFn, KernelArgs);
6070   CGF.EmitBranch(OffloadContBlock);
6071 
6072   CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
6073 }
6074 
6075 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
6076                                                     StringRef ParentName) {
6077   if (!S)
6078     return;
6079 
6080   // If we find a OMP target directive, codegen the outline function and
6081   // register the result.
6082   // FIXME: Add other directives with target when they become supported.
6083   bool isTargetDirective = isa<OMPTargetDirective>(S);
6084 
6085   if (isTargetDirective) {
6086     auto *E = cast<OMPExecutableDirective>(S);
6087     unsigned DeviceID;
6088     unsigned FileID;
6089     unsigned Line;
6090     getTargetEntryUniqueInfo(CGM.getContext(), E->getLocStart(), DeviceID,
6091                              FileID, Line);
6092 
6093     // Is this a target region that should not be emitted as an entry point? If
6094     // so just signal we are done with this target region.
6095     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
6096                                                             ParentName, Line))
6097       return;
6098 
6099     llvm::Function *Fn;
6100     llvm::Constant *Addr;
6101     std::tie(Fn, Addr) =
6102         CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction(
6103             CGM, cast<OMPTargetDirective>(*E), ParentName,
6104             /*isOffloadEntry=*/true);
6105     assert(Fn && Addr && "Target region emission failed.");
6106     return;
6107   }
6108 
6109   if (const OMPExecutableDirective *E = dyn_cast<OMPExecutableDirective>(S)) {
6110     if (!E->hasAssociatedStmt())
6111       return;
6112 
6113     scanForTargetRegionsFunctions(
6114         cast<CapturedStmt>(E->getAssociatedStmt())->getCapturedStmt(),
6115         ParentName);
6116     return;
6117   }
6118 
6119   // If this is a lambda function, look into its body.
6120   if (auto *L = dyn_cast<LambdaExpr>(S))
6121     S = L->getBody();
6122 
6123   // Keep looking for target regions recursively.
6124   for (auto *II : S->children())
6125     scanForTargetRegionsFunctions(II, ParentName);
6126 }
6127 
6128 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
6129   auto &FD = *cast<FunctionDecl>(GD.getDecl());
6130 
6131   // If emitting code for the host, we do not process FD here. Instead we do
6132   // the normal code generation.
6133   if (!CGM.getLangOpts().OpenMPIsDevice)
6134     return false;
6135 
6136   // Try to detect target regions in the function.
6137   scanForTargetRegionsFunctions(FD.getBody(), CGM.getMangledName(GD));
6138 
6139   // We should not emit any function othen that the ones created during the
6140   // scanning. Therefore, we signal that this function is completely dealt
6141   // with.
6142   return true;
6143 }
6144 
6145 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
6146   if (!CGM.getLangOpts().OpenMPIsDevice)
6147     return false;
6148 
6149   // Check if there are Ctors/Dtors in this declaration and look for target
6150   // regions in it. We use the complete variant to produce the kernel name
6151   // mangling.
6152   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
6153   if (auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
6154     for (auto *Ctor : RD->ctors()) {
6155       StringRef ParentName =
6156           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
6157       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
6158     }
6159     auto *Dtor = RD->getDestructor();
6160     if (Dtor) {
6161       StringRef ParentName =
6162           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
6163       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
6164     }
6165   }
6166 
6167   // If we are in target mode we do not emit any global (declare target is not
6168   // implemented yet). Therefore we signal that GD was processed in this case.
6169   return true;
6170 }
6171 
6172 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
6173   auto *VD = GD.getDecl();
6174   if (isa<FunctionDecl>(VD))
6175     return emitTargetFunctions(GD);
6176 
6177   return emitTargetGlobalVariable(GD);
6178 }
6179 
6180 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
6181   // If we have offloading in the current module, we need to emit the entries
6182   // now and register the offloading descriptor.
6183   createOffloadEntriesAndInfoMetadata();
6184 
6185   // Create and register the offloading binary descriptors. This is the main
6186   // entity that captures all the information about offloading in the current
6187   // compilation unit.
6188   return createOffloadingBinaryDescriptorRegistration();
6189 }
6190 
6191 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
6192                                     const OMPExecutableDirective &D,
6193                                     SourceLocation Loc,
6194                                     llvm::Value *OutlinedFn,
6195                                     ArrayRef<llvm::Value *> CapturedVars) {
6196   if (!CGF.HaveInsertPoint())
6197     return;
6198 
6199   auto *RTLoc = emitUpdateLocation(CGF, Loc);
6200   CodeGenFunction::RunCleanupsScope Scope(CGF);
6201 
6202   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
6203   llvm::Value *Args[] = {
6204       RTLoc,
6205       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
6206       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
6207   llvm::SmallVector<llvm::Value *, 16> RealArgs;
6208   RealArgs.append(std::begin(Args), std::end(Args));
6209   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
6210 
6211   auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
6212   CGF.EmitRuntimeCall(RTLFn, RealArgs);
6213 }
6214 
6215 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
6216                                          const Expr *NumTeams,
6217                                          const Expr *ThreadLimit,
6218                                          SourceLocation Loc) {
6219   if (!CGF.HaveInsertPoint())
6220     return;
6221 
6222   auto *RTLoc = emitUpdateLocation(CGF, Loc);
6223 
6224   llvm::Value *NumTeamsVal =
6225       (NumTeams)
6226           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
6227                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
6228           : CGF.Builder.getInt32(0);
6229 
6230   llvm::Value *ThreadLimitVal =
6231       (ThreadLimit)
6232           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
6233                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
6234           : CGF.Builder.getInt32(0);
6235 
6236   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
6237   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
6238                                      ThreadLimitVal};
6239   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
6240                       PushNumTeamsArgs);
6241 }
6242 
6243 void CGOpenMPRuntime::emitTargetDataCalls(
6244     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
6245     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
6246   if (!CGF.HaveInsertPoint())
6247     return;
6248 
6249   // Action used to replace the default codegen action and turn privatization
6250   // off.
6251   PrePostActionTy NoPrivAction;
6252 
6253   // Generate the code for the opening of the data environment. Capture all the
6254   // arguments of the runtime call by reference because they are used in the
6255   // closing of the region.
6256   auto &&BeginThenGen = [&D, &CGF, Device, &Info, &CodeGen, &NoPrivAction](
6257       CodeGenFunction &CGF, PrePostActionTy &) {
6258     // Fill up the arrays with all the mapped variables.
6259     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
6260     MappableExprsHandler::MapValuesArrayTy Pointers;
6261     MappableExprsHandler::MapValuesArrayTy Sizes;
6262     MappableExprsHandler::MapFlagsArrayTy MapTypes;
6263 
6264     // Get map clause information.
6265     MappableExprsHandler MCHandler(D, CGF);
6266     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
6267 
6268     // Fill up the arrays and create the arguments.
6269     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
6270 
6271     llvm::Value *BasePointersArrayArg = nullptr;
6272     llvm::Value *PointersArrayArg = nullptr;
6273     llvm::Value *SizesArrayArg = nullptr;
6274     llvm::Value *MapTypesArrayArg = nullptr;
6275     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
6276                                  SizesArrayArg, MapTypesArrayArg, Info);
6277 
6278     // Emit device ID if any.
6279     llvm::Value *DeviceID = nullptr;
6280     if (Device)
6281       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
6282                                            CGF.Int32Ty, /*isSigned=*/true);
6283     else
6284       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
6285 
6286     // Emit the number of elements in the offloading arrays.
6287     auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
6288 
6289     llvm::Value *OffloadingArgs[] = {
6290         DeviceID,         PointerNum,    BasePointersArrayArg,
6291         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
6292     auto &RT = CGF.CGM.getOpenMPRuntime();
6293     CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_begin),
6294                         OffloadingArgs);
6295 
6296     // If device pointer privatization is required, emit the body of the region
6297     // here. It will have to be duplicated: with and without privatization.
6298     if (!Info.CaptureDeviceAddrMap.empty())
6299       CodeGen(CGF);
6300   };
6301 
6302   // Generate code for the closing of the data region.
6303   auto &&EndThenGen = [&CGF, Device, &Info](CodeGenFunction &CGF,
6304                                             PrePostActionTy &) {
6305     assert(Info.isValid() && "Invalid data environment closing arguments.");
6306 
6307     llvm::Value *BasePointersArrayArg = nullptr;
6308     llvm::Value *PointersArrayArg = nullptr;
6309     llvm::Value *SizesArrayArg = nullptr;
6310     llvm::Value *MapTypesArrayArg = nullptr;
6311     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
6312                                  SizesArrayArg, MapTypesArrayArg, Info);
6313 
6314     // Emit device ID if any.
6315     llvm::Value *DeviceID = nullptr;
6316     if (Device)
6317       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
6318                                            CGF.Int32Ty, /*isSigned=*/true);
6319     else
6320       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
6321 
6322     // Emit the number of elements in the offloading arrays.
6323     auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
6324 
6325     llvm::Value *OffloadingArgs[] = {
6326         DeviceID,         PointerNum,    BasePointersArrayArg,
6327         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
6328     auto &RT = CGF.CGM.getOpenMPRuntime();
6329     CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_end),
6330                         OffloadingArgs);
6331   };
6332 
6333   // If we need device pointer privatization, we need to emit the body of the
6334   // region with no privatization in the 'else' branch of the conditional.
6335   // Otherwise, we don't have to do anything.
6336   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
6337                                                          PrePostActionTy &) {
6338     if (!Info.CaptureDeviceAddrMap.empty()) {
6339       CodeGen.setAction(NoPrivAction);
6340       CodeGen(CGF);
6341     }
6342   };
6343 
6344   // We don't have to do anything to close the region if the if clause evaluates
6345   // to false.
6346   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
6347 
6348   if (IfCond) {
6349     emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
6350   } else {
6351     RegionCodeGenTy RCG(BeginThenGen);
6352     RCG(CGF);
6353   }
6354 
6355   // If we don't require privatization of device pointers, we emit the body in
6356   // between the runtime calls. This avoids duplicating the body code.
6357   if (Info.CaptureDeviceAddrMap.empty()) {
6358     CodeGen.setAction(NoPrivAction);
6359     CodeGen(CGF);
6360   }
6361 
6362   if (IfCond) {
6363     emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen);
6364   } else {
6365     RegionCodeGenTy RCG(EndThenGen);
6366     RCG(CGF);
6367   }
6368 }
6369 
6370 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
6371     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
6372     const Expr *Device) {
6373   if (!CGF.HaveInsertPoint())
6374     return;
6375 
6376   assert((isa<OMPTargetEnterDataDirective>(D) ||
6377           isa<OMPTargetExitDataDirective>(D) ||
6378           isa<OMPTargetUpdateDirective>(D)) &&
6379          "Expecting either target enter, exit data, or update directives.");
6380 
6381   // Generate the code for the opening of the data environment.
6382   auto &&ThenGen = [&D, &CGF, Device](CodeGenFunction &CGF, PrePostActionTy &) {
6383     // Fill up the arrays with all the mapped variables.
6384     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
6385     MappableExprsHandler::MapValuesArrayTy Pointers;
6386     MappableExprsHandler::MapValuesArrayTy Sizes;
6387     MappableExprsHandler::MapFlagsArrayTy MapTypes;
6388 
6389     // Get map clause information.
6390     MappableExprsHandler MEHandler(D, CGF);
6391     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
6392 
6393     // Fill up the arrays and create the arguments.
6394     TargetDataInfo Info;
6395     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
6396     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
6397                                  Info.PointersArray, Info.SizesArray,
6398                                  Info.MapTypesArray, Info);
6399 
6400     // Emit device ID if any.
6401     llvm::Value *DeviceID = nullptr;
6402     if (Device)
6403       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
6404                                            CGF.Int32Ty, /*isSigned=*/true);
6405     else
6406       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
6407 
6408     // Emit the number of elements in the offloading arrays.
6409     auto *PointerNum = CGF.Builder.getInt32(BasePointers.size());
6410 
6411     llvm::Value *OffloadingArgs[] = {
6412         DeviceID,           PointerNum,      Info.BasePointersArray,
6413         Info.PointersArray, Info.SizesArray, Info.MapTypesArray};
6414 
6415     auto &RT = CGF.CGM.getOpenMPRuntime();
6416     // Select the right runtime function call for each expected standalone
6417     // directive.
6418     OpenMPRTLFunction RTLFn;
6419     switch (D.getDirectiveKind()) {
6420     default:
6421       llvm_unreachable("Unexpected standalone target data directive.");
6422       break;
6423     case OMPD_target_enter_data:
6424       RTLFn = OMPRTL__tgt_target_data_begin;
6425       break;
6426     case OMPD_target_exit_data:
6427       RTLFn = OMPRTL__tgt_target_data_end;
6428       break;
6429     case OMPD_target_update:
6430       RTLFn = OMPRTL__tgt_target_data_update;
6431       break;
6432     }
6433     CGF.EmitRuntimeCall(RT.createRuntimeFunction(RTLFn), OffloadingArgs);
6434   };
6435 
6436   // In the event we get an if clause, we don't have to take any action on the
6437   // else side.
6438   auto &&ElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
6439 
6440   if (IfCond) {
6441     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
6442   } else {
6443     RegionCodeGenTy ThenGenRCG(ThenGen);
6444     ThenGenRCG(CGF);
6445   }
6446 }
6447 
6448 namespace {
6449   /// Kind of parameter in a function with 'declare simd' directive.
6450   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
6451   /// Attribute set of the parameter.
6452   struct ParamAttrTy {
6453     ParamKindTy Kind = Vector;
6454     llvm::APSInt StrideOrArg;
6455     llvm::APSInt Alignment;
6456   };
6457 } // namespace
6458 
6459 static unsigned evaluateCDTSize(const FunctionDecl *FD,
6460                                 ArrayRef<ParamAttrTy> ParamAttrs) {
6461   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
6462   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
6463   // of that clause. The VLEN value must be power of 2.
6464   // In other case the notion of the function`s "characteristic data type" (CDT)
6465   // is used to compute the vector length.
6466   // CDT is defined in the following order:
6467   //   a) For non-void function, the CDT is the return type.
6468   //   b) If the function has any non-uniform, non-linear parameters, then the
6469   //   CDT is the type of the first such parameter.
6470   //   c) If the CDT determined by a) or b) above is struct, union, or class
6471   //   type which is pass-by-value (except for the type that maps to the
6472   //   built-in complex data type), the characteristic data type is int.
6473   //   d) If none of the above three cases is applicable, the CDT is int.
6474   // The VLEN is then determined based on the CDT and the size of vector
6475   // register of that ISA for which current vector version is generated. The
6476   // VLEN is computed using the formula below:
6477   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
6478   // where vector register size specified in section 3.2.1 Registers and the
6479   // Stack Frame of original AMD64 ABI document.
6480   QualType RetType = FD->getReturnType();
6481   if (RetType.isNull())
6482     return 0;
6483   ASTContext &C = FD->getASTContext();
6484   QualType CDT;
6485   if (!RetType.isNull() && !RetType->isVoidType())
6486     CDT = RetType;
6487   else {
6488     unsigned Offset = 0;
6489     if (auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
6490       if (ParamAttrs[Offset].Kind == Vector)
6491         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
6492       ++Offset;
6493     }
6494     if (CDT.isNull()) {
6495       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
6496         if (ParamAttrs[I + Offset].Kind == Vector) {
6497           CDT = FD->getParamDecl(I)->getType();
6498           break;
6499         }
6500       }
6501     }
6502   }
6503   if (CDT.isNull())
6504     CDT = C.IntTy;
6505   CDT = CDT->getCanonicalTypeUnqualified();
6506   if (CDT->isRecordType() || CDT->isUnionType())
6507     CDT = C.IntTy;
6508   return C.getTypeSize(CDT);
6509 }
6510 
6511 static void
6512 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
6513                            llvm::APSInt VLENVal,
6514                            ArrayRef<ParamAttrTy> ParamAttrs,
6515                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
6516   struct ISADataTy {
6517     char ISA;
6518     unsigned VecRegSize;
6519   };
6520   ISADataTy ISAData[] = {
6521       {
6522           'b', 128
6523       }, // SSE
6524       {
6525           'c', 256
6526       }, // AVX
6527       {
6528           'd', 256
6529       }, // AVX2
6530       {
6531           'e', 512
6532       }, // AVX512
6533   };
6534   llvm::SmallVector<char, 2> Masked;
6535   switch (State) {
6536   case OMPDeclareSimdDeclAttr::BS_Undefined:
6537     Masked.push_back('N');
6538     Masked.push_back('M');
6539     break;
6540   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
6541     Masked.push_back('N');
6542     break;
6543   case OMPDeclareSimdDeclAttr::BS_Inbranch:
6544     Masked.push_back('M');
6545     break;
6546   }
6547   for (auto Mask : Masked) {
6548     for (auto &Data : ISAData) {
6549       SmallString<256> Buffer;
6550       llvm::raw_svector_ostream Out(Buffer);
6551       Out << "_ZGV" << Data.ISA << Mask;
6552       if (!VLENVal) {
6553         Out << llvm::APSInt::getUnsigned(Data.VecRegSize /
6554                                          evaluateCDTSize(FD, ParamAttrs));
6555       } else
6556         Out << VLENVal;
6557       for (auto &ParamAttr : ParamAttrs) {
6558         switch (ParamAttr.Kind){
6559         case LinearWithVarStride:
6560           Out << 's' << ParamAttr.StrideOrArg;
6561           break;
6562         case Linear:
6563           Out << 'l';
6564           if (!!ParamAttr.StrideOrArg)
6565             Out << ParamAttr.StrideOrArg;
6566           break;
6567         case Uniform:
6568           Out << 'u';
6569           break;
6570         case Vector:
6571           Out << 'v';
6572           break;
6573         }
6574         if (!!ParamAttr.Alignment)
6575           Out << 'a' << ParamAttr.Alignment;
6576       }
6577       Out << '_' << Fn->getName();
6578       Fn->addFnAttr(Out.str());
6579     }
6580   }
6581 }
6582 
6583 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
6584                                               llvm::Function *Fn) {
6585   ASTContext &C = CGM.getContext();
6586   FD = FD->getCanonicalDecl();
6587   // Map params to their positions in function decl.
6588   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
6589   if (isa<CXXMethodDecl>(FD))
6590     ParamPositions.insert({FD, 0});
6591   unsigned ParamPos = ParamPositions.size();
6592   for (auto *P : FD->parameters()) {
6593     ParamPositions.insert({P->getCanonicalDecl(), ParamPos});
6594     ++ParamPos;
6595   }
6596   for (auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
6597     llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
6598     // Mark uniform parameters.
6599     for (auto *E : Attr->uniforms()) {
6600       E = E->IgnoreParenImpCasts();
6601       unsigned Pos;
6602       if (isa<CXXThisExpr>(E))
6603         Pos = ParamPositions[FD];
6604       else {
6605         auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
6606                         ->getCanonicalDecl();
6607         Pos = ParamPositions[PVD];
6608       }
6609       ParamAttrs[Pos].Kind = Uniform;
6610     }
6611     // Get alignment info.
6612     auto NI = Attr->alignments_begin();
6613     for (auto *E : Attr->aligneds()) {
6614       E = E->IgnoreParenImpCasts();
6615       unsigned Pos;
6616       QualType ParmTy;
6617       if (isa<CXXThisExpr>(E)) {
6618         Pos = ParamPositions[FD];
6619         ParmTy = E->getType();
6620       } else {
6621         auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
6622                         ->getCanonicalDecl();
6623         Pos = ParamPositions[PVD];
6624         ParmTy = PVD->getType();
6625       }
6626       ParamAttrs[Pos].Alignment =
6627           (*NI) ? (*NI)->EvaluateKnownConstInt(C)
6628                 : llvm::APSInt::getUnsigned(
6629                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
6630                           .getQuantity());
6631       ++NI;
6632     }
6633     // Mark linear parameters.
6634     auto SI = Attr->steps_begin();
6635     auto MI = Attr->modifiers_begin();
6636     for (auto *E : Attr->linears()) {
6637       E = E->IgnoreParenImpCasts();
6638       unsigned Pos;
6639       if (isa<CXXThisExpr>(E))
6640         Pos = ParamPositions[FD];
6641       else {
6642         auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
6643                         ->getCanonicalDecl();
6644         Pos = ParamPositions[PVD];
6645       }
6646       auto &ParamAttr = ParamAttrs[Pos];
6647       ParamAttr.Kind = Linear;
6648       if (*SI) {
6649         if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C,
6650                                   Expr::SE_AllowSideEffects)) {
6651           if (auto *DRE = cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
6652             if (auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
6653               ParamAttr.Kind = LinearWithVarStride;
6654               ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
6655                   ParamPositions[StridePVD->getCanonicalDecl()]);
6656             }
6657           }
6658         }
6659       }
6660       ++SI;
6661       ++MI;
6662     }
6663     llvm::APSInt VLENVal;
6664     if (const Expr *VLEN = Attr->getSimdlen())
6665       VLENVal = VLEN->EvaluateKnownConstInt(C);
6666     OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
6667     if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
6668         CGM.getTriple().getArch() == llvm::Triple::x86_64)
6669       emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
6670   }
6671 }
6672 
6673 namespace {
6674 /// Cleanup action for doacross support.
6675 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
6676 public:
6677   static const int DoacrossFinArgs = 2;
6678 
6679 private:
6680   llvm::Value *RTLFn;
6681   llvm::Value *Args[DoacrossFinArgs];
6682 
6683 public:
6684   DoacrossCleanupTy(llvm::Value *RTLFn, ArrayRef<llvm::Value *> CallArgs)
6685       : RTLFn(RTLFn) {
6686     assert(CallArgs.size() == DoacrossFinArgs);
6687     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
6688   }
6689   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
6690     if (!CGF.HaveInsertPoint())
6691       return;
6692     CGF.EmitRuntimeCall(RTLFn, Args);
6693   }
6694 };
6695 } // namespace
6696 
6697 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
6698                                        const OMPLoopDirective &D) {
6699   if (!CGF.HaveInsertPoint())
6700     return;
6701 
6702   ASTContext &C = CGM.getContext();
6703   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
6704   RecordDecl *RD;
6705   if (KmpDimTy.isNull()) {
6706     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
6707     //  kmp_int64 lo; // lower
6708     //  kmp_int64 up; // upper
6709     //  kmp_int64 st; // stride
6710     // };
6711     RD = C.buildImplicitRecord("kmp_dim");
6712     RD->startDefinition();
6713     addFieldToRecordDecl(C, RD, Int64Ty);
6714     addFieldToRecordDecl(C, RD, Int64Ty);
6715     addFieldToRecordDecl(C, RD, Int64Ty);
6716     RD->completeDefinition();
6717     KmpDimTy = C.getRecordType(RD);
6718   } else
6719     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
6720 
6721   Address DimsAddr = CGF.CreateMemTemp(KmpDimTy, "dims");
6722   CGF.EmitNullInitialization(DimsAddr, KmpDimTy);
6723   enum { LowerFD = 0, UpperFD, StrideFD };
6724   // Fill dims with data.
6725   LValue DimsLVal = CGF.MakeAddrLValue(DimsAddr, KmpDimTy);
6726   // dims.upper = num_iterations;
6727   LValue UpperLVal =
6728       CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), UpperFD));
6729   llvm::Value *NumIterVal = CGF.EmitScalarConversion(
6730       CGF.EmitScalarExpr(D.getNumIterations()), D.getNumIterations()->getType(),
6731       Int64Ty, D.getNumIterations()->getExprLoc());
6732   CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
6733   // dims.stride = 1;
6734   LValue StrideLVal =
6735       CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), StrideFD));
6736   CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
6737                         StrideLVal);
6738 
6739   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
6740   // kmp_int32 num_dims, struct kmp_dim * dims);
6741   llvm::Value *Args[] = {emitUpdateLocation(CGF, D.getLocStart()),
6742                          getThreadID(CGF, D.getLocStart()),
6743                          llvm::ConstantInt::getSigned(CGM.Int32Ty, 1),
6744                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6745                              DimsAddr.getPointer(), CGM.VoidPtrTy)};
6746 
6747   llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_init);
6748   CGF.EmitRuntimeCall(RTLFn, Args);
6749   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
6750       emitUpdateLocation(CGF, D.getLocEnd()), getThreadID(CGF, D.getLocEnd())};
6751   llvm::Value *FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
6752   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
6753                                              llvm::makeArrayRef(FiniArgs));
6754 }
6755 
6756 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
6757                                           const OMPDependClause *C) {
6758   QualType Int64Ty =
6759       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
6760   const Expr *CounterVal = C->getCounterValue();
6761   assert(CounterVal);
6762   llvm::Value *CntVal = CGF.EmitScalarConversion(CGF.EmitScalarExpr(CounterVal),
6763                                                  CounterVal->getType(), Int64Ty,
6764                                                  CounterVal->getExprLoc());
6765   Address CntAddr = CGF.CreateMemTemp(Int64Ty, ".cnt.addr");
6766   CGF.EmitStoreOfScalar(CntVal, CntAddr, /*Volatile=*/false, Int64Ty);
6767   llvm::Value *Args[] = {emitUpdateLocation(CGF, C->getLocStart()),
6768                          getThreadID(CGF, C->getLocStart()),
6769                          CntAddr.getPointer()};
6770   llvm::Value *RTLFn;
6771   if (C->getDependencyKind() == OMPC_DEPEND_source)
6772     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
6773   else {
6774     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
6775     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
6776   }
6777   CGF.EmitRuntimeCall(RTLFn, Args);
6778 }
6779 
6780