1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for OpenMP runtime code generation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGOpenMPRuntime.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Decl.h"
19 #include "clang/AST/StmtOpenMP.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/Bitcode/ReaderWriter.h"
22 #include "llvm/IR/CallSite.h"
23 #include "llvm/IR/DerivedTypes.h"
24 #include "llvm/IR/GlobalValue.h"
25 #include "llvm/IR/Value.h"
26 #include "llvm/Support/Format.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <cassert>
29 
30 using namespace clang;
31 using namespace CodeGen;
32 
33 namespace {
34 /// \brief Base class for handling code generation inside OpenMP regions.
35 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
36 public:
37   /// \brief Kinds of OpenMP regions used in codegen.
38   enum CGOpenMPRegionKind {
39     /// \brief Region with outlined function for standalone 'parallel'
40     /// directive.
41     ParallelOutlinedRegion,
42     /// \brief Region with outlined function for standalone 'task' directive.
43     TaskOutlinedRegion,
44     /// \brief Region for constructs that do not require function outlining,
45     /// like 'for', 'sections', 'atomic' etc. directives.
46     InlinedRegion,
47     /// \brief Region with outlined function for standalone 'target' directive.
48     TargetRegion,
49   };
50 
51   CGOpenMPRegionInfo(const CapturedStmt &CS,
52                      const CGOpenMPRegionKind RegionKind,
53                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
54                      bool HasCancel)
55       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
56         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
57 
58   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
59                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
60                      bool HasCancel)
61       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
62         Kind(Kind), HasCancel(HasCancel) {}
63 
64   /// \brief Get a variable or parameter for storing global thread id
65   /// inside OpenMP construct.
66   virtual const VarDecl *getThreadIDVariable() const = 0;
67 
68   /// \brief Emit the captured statement body.
69   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
70 
71   /// \brief Get an LValue for the current ThreadID variable.
72   /// \return LValue for thread id variable. This LValue always has type int32*.
73   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
74 
75   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
76 
77   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
78 
79   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
80 
81   bool hasCancel() const { return HasCancel; }
82 
83   static bool classof(const CGCapturedStmtInfo *Info) {
84     return Info->getKind() == CR_OpenMP;
85   }
86 
87   ~CGOpenMPRegionInfo() override = default;
88 
89 protected:
90   CGOpenMPRegionKind RegionKind;
91   RegionCodeGenTy CodeGen;
92   OpenMPDirectiveKind Kind;
93   bool HasCancel;
94 };
95 
96 /// \brief API for captured statement code generation in OpenMP constructs.
97 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
98 public:
99   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
100                              const RegionCodeGenTy &CodeGen,
101                              OpenMPDirectiveKind Kind, bool HasCancel)
102       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
103                            HasCancel),
104         ThreadIDVar(ThreadIDVar) {
105     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
106   }
107 
108   /// \brief Get a variable or parameter for storing global thread id
109   /// inside OpenMP construct.
110   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
111 
112   /// \brief Get the name of the capture helper.
113   StringRef getHelperName() const override { return ".omp_outlined."; }
114 
115   static bool classof(const CGCapturedStmtInfo *Info) {
116     return CGOpenMPRegionInfo::classof(Info) &&
117            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
118                ParallelOutlinedRegion;
119   }
120 
121 private:
122   /// \brief A variable or parameter storing global thread id for OpenMP
123   /// constructs.
124   const VarDecl *ThreadIDVar;
125 };
126 
127 /// \brief API for captured statement code generation in OpenMP constructs.
128 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
129 public:
130   class UntiedTaskActionTy final : public PrePostActionTy {
131     bool Untied;
132     const VarDecl *PartIDVar;
133     const RegionCodeGenTy UntiedCodeGen;
134     llvm::SwitchInst *UntiedSwitch = nullptr;
135 
136   public:
137     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
138                        const RegionCodeGenTy &UntiedCodeGen)
139         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
140     void Enter(CodeGenFunction &CGF) override {
141       if (Untied) {
142         // Emit task switching point.
143         auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
144             CGF.GetAddrOfLocalVar(PartIDVar),
145             PartIDVar->getType()->castAs<PointerType>());
146         auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, SourceLocation());
147         auto *DoneBB = CGF.createBasicBlock(".untied.done.");
148         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
149         CGF.EmitBlock(DoneBB);
150         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
151         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
152         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
153                               CGF.Builder.GetInsertBlock());
154         emitUntiedSwitch(CGF);
155       }
156     }
157     void emitUntiedSwitch(CodeGenFunction &CGF) const {
158       if (Untied) {
159         auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
160             CGF.GetAddrOfLocalVar(PartIDVar),
161             PartIDVar->getType()->castAs<PointerType>());
162         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
163                               PartIdLVal);
164         UntiedCodeGen(CGF);
165         CodeGenFunction::JumpDest CurPoint =
166             CGF.getJumpDestInCurrentScope(".untied.next.");
167         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
168         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
169         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
170                               CGF.Builder.GetInsertBlock());
171         CGF.EmitBranchThroughCleanup(CurPoint);
172         CGF.EmitBlock(CurPoint.getBlock());
173       }
174     }
175     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
176   };
177   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
178                                  const VarDecl *ThreadIDVar,
179                                  const RegionCodeGenTy &CodeGen,
180                                  OpenMPDirectiveKind Kind, bool HasCancel,
181                                  const UntiedTaskActionTy &Action)
182       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
183         ThreadIDVar(ThreadIDVar), Action(Action) {
184     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
185   }
186 
187   /// \brief Get a variable or parameter for storing global thread id
188   /// inside OpenMP construct.
189   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
190 
191   /// \brief Get an LValue for the current ThreadID variable.
192   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
193 
194   /// \brief Get the name of the capture helper.
195   StringRef getHelperName() const override { return ".omp_outlined."; }
196 
197   void emitUntiedSwitch(CodeGenFunction &CGF) override {
198     Action.emitUntiedSwitch(CGF);
199   }
200 
201   static bool classof(const CGCapturedStmtInfo *Info) {
202     return CGOpenMPRegionInfo::classof(Info) &&
203            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
204                TaskOutlinedRegion;
205   }
206 
207 private:
208   /// \brief A variable or parameter storing global thread id for OpenMP
209   /// constructs.
210   const VarDecl *ThreadIDVar;
211   /// Action for emitting code for untied tasks.
212   const UntiedTaskActionTy &Action;
213 };
214 
215 /// \brief API for inlined captured statement code generation in OpenMP
216 /// constructs.
217 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
218 public:
219   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
220                             const RegionCodeGenTy &CodeGen,
221                             OpenMPDirectiveKind Kind, bool HasCancel)
222       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
223         OldCSI(OldCSI),
224         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
225 
226   // \brief Retrieve the value of the context parameter.
227   llvm::Value *getContextValue() const override {
228     if (OuterRegionInfo)
229       return OuterRegionInfo->getContextValue();
230     llvm_unreachable("No context value for inlined OpenMP region");
231   }
232 
233   void setContextValue(llvm::Value *V) override {
234     if (OuterRegionInfo) {
235       OuterRegionInfo->setContextValue(V);
236       return;
237     }
238     llvm_unreachable("No context value for inlined OpenMP region");
239   }
240 
241   /// \brief Lookup the captured field decl for a variable.
242   const FieldDecl *lookup(const VarDecl *VD) const override {
243     if (OuterRegionInfo)
244       return OuterRegionInfo->lookup(VD);
245     // If there is no outer outlined region,no need to lookup in a list of
246     // captured variables, we can use the original one.
247     return nullptr;
248   }
249 
250   FieldDecl *getThisFieldDecl() const override {
251     if (OuterRegionInfo)
252       return OuterRegionInfo->getThisFieldDecl();
253     return nullptr;
254   }
255 
256   /// \brief Get a variable or parameter for storing global thread id
257   /// inside OpenMP construct.
258   const VarDecl *getThreadIDVariable() const override {
259     if (OuterRegionInfo)
260       return OuterRegionInfo->getThreadIDVariable();
261     return nullptr;
262   }
263 
264   /// \brief Get the name of the capture helper.
265   StringRef getHelperName() const override {
266     if (auto *OuterRegionInfo = getOldCSI())
267       return OuterRegionInfo->getHelperName();
268     llvm_unreachable("No helper name for inlined OpenMP construct");
269   }
270 
271   void emitUntiedSwitch(CodeGenFunction &CGF) override {
272     if (OuterRegionInfo)
273       OuterRegionInfo->emitUntiedSwitch(CGF);
274   }
275 
276   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
277 
278   static bool classof(const CGCapturedStmtInfo *Info) {
279     return CGOpenMPRegionInfo::classof(Info) &&
280            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
281   }
282 
283   ~CGOpenMPInlinedRegionInfo() override = default;
284 
285 private:
286   /// \brief CodeGen info about outer OpenMP region.
287   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
288   CGOpenMPRegionInfo *OuterRegionInfo;
289 };
290 
291 /// \brief API for captured statement code generation in OpenMP target
292 /// constructs. For this captures, implicit parameters are used instead of the
293 /// captured fields. The name of the target region has to be unique in a given
294 /// application so it is provided by the client, because only the client has
295 /// the information to generate that.
296 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
297 public:
298   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
299                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
300       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
301                            /*HasCancel=*/false),
302         HelperName(HelperName) {}
303 
304   /// \brief This is unused for target regions because each starts executing
305   /// with a single thread.
306   const VarDecl *getThreadIDVariable() const override { return nullptr; }
307 
308   /// \brief Get the name of the capture helper.
309   StringRef getHelperName() const override { return HelperName; }
310 
311   static bool classof(const CGCapturedStmtInfo *Info) {
312     return CGOpenMPRegionInfo::classof(Info) &&
313            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
314   }
315 
316 private:
317   StringRef HelperName;
318 };
319 
320 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
321   llvm_unreachable("No codegen for expressions");
322 }
323 /// \brief API for generation of expressions captured in a innermost OpenMP
324 /// region.
325 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
326 public:
327   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
328       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
329                                   OMPD_unknown,
330                                   /*HasCancel=*/false),
331         PrivScope(CGF) {
332     // Make sure the globals captured in the provided statement are local by
333     // using the privatization logic. We assume the same variable is not
334     // captured more than once.
335     for (auto &C : CS.captures()) {
336       if (!C.capturesVariable() && !C.capturesVariableByCopy())
337         continue;
338 
339       const VarDecl *VD = C.getCapturedVar();
340       if (VD->isLocalVarDeclOrParm())
341         continue;
342 
343       DeclRefExpr DRE(const_cast<VarDecl *>(VD),
344                       /*RefersToEnclosingVariableOrCapture=*/false,
345                       VD->getType().getNonReferenceType(), VK_LValue,
346                       SourceLocation());
347       PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address {
348         return CGF.EmitLValue(&DRE).getAddress();
349       });
350     }
351     (void)PrivScope.Privatize();
352   }
353 
354   /// \brief Lookup the captured field decl for a variable.
355   const FieldDecl *lookup(const VarDecl *VD) const override {
356     if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
357       return FD;
358     return nullptr;
359   }
360 
361   /// \brief Emit the captured statement body.
362   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
363     llvm_unreachable("No body for expressions");
364   }
365 
366   /// \brief Get a variable or parameter for storing global thread id
367   /// inside OpenMP construct.
368   const VarDecl *getThreadIDVariable() const override {
369     llvm_unreachable("No thread id for expressions");
370   }
371 
372   /// \brief Get the name of the capture helper.
373   StringRef getHelperName() const override {
374     llvm_unreachable("No helper name for expressions");
375   }
376 
377   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
378 
379 private:
380   /// Private scope to capture global variables.
381   CodeGenFunction::OMPPrivateScope PrivScope;
382 };
383 
384 /// \brief RAII for emitting code of OpenMP constructs.
385 class InlinedOpenMPRegionRAII {
386   CodeGenFunction &CGF;
387   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
388   FieldDecl *LambdaThisCaptureField = nullptr;
389 
390 public:
391   /// \brief Constructs region for combined constructs.
392   /// \param CodeGen Code generation sequence for combined directives. Includes
393   /// a list of functions used for code generation of implicitly inlined
394   /// regions.
395   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
396                           OpenMPDirectiveKind Kind, bool HasCancel)
397       : CGF(CGF) {
398     // Start emission for the construct.
399     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
400         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
401     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
402     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
403     CGF.LambdaThisCaptureField = nullptr;
404   }
405 
406   ~InlinedOpenMPRegionRAII() {
407     // Restore original CapturedStmtInfo only if we're done with code emission.
408     auto *OldCSI =
409         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
410     delete CGF.CapturedStmtInfo;
411     CGF.CapturedStmtInfo = OldCSI;
412     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
413     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
414   }
415 };
416 
417 /// \brief Values for bit flags used in the ident_t to describe the fields.
418 /// All enumeric elements are named and described in accordance with the code
419 /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
420 enum OpenMPLocationFlags {
421   /// \brief Use trampoline for internal microtask.
422   OMP_IDENT_IMD = 0x01,
423   /// \brief Use c-style ident structure.
424   OMP_IDENT_KMPC = 0x02,
425   /// \brief Atomic reduction option for kmpc_reduce.
426   OMP_ATOMIC_REDUCE = 0x10,
427   /// \brief Explicit 'barrier' directive.
428   OMP_IDENT_BARRIER_EXPL = 0x20,
429   /// \brief Implicit barrier in code.
430   OMP_IDENT_BARRIER_IMPL = 0x40,
431   /// \brief Implicit barrier in 'for' directive.
432   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
433   /// \brief Implicit barrier in 'sections' directive.
434   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
435   /// \brief Implicit barrier in 'single' directive.
436   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140
437 };
438 
439 /// \brief Describes ident structure that describes a source location.
440 /// All descriptions are taken from
441 /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
442 /// Original structure:
443 /// typedef struct ident {
444 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
445 ///                                  see above  */
446 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
447 ///                                  KMP_IDENT_KMPC identifies this union
448 ///                                  member  */
449 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
450 ///                                  see above */
451 ///#if USE_ITT_BUILD
452 ///                            /*  but currently used for storing
453 ///                                region-specific ITT */
454 ///                            /*  contextual information. */
455 ///#endif /* USE_ITT_BUILD */
456 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
457 ///                                 C++  */
458 ///    char const *psource;    /**< String describing the source location.
459 ///                            The string is composed of semi-colon separated
460 //                             fields which describe the source file,
461 ///                            the function and a pair of line numbers that
462 ///                            delimit the construct.
463 ///                             */
464 /// } ident_t;
465 enum IdentFieldIndex {
466   /// \brief might be used in Fortran
467   IdentField_Reserved_1,
468   /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
469   IdentField_Flags,
470   /// \brief Not really used in Fortran any more
471   IdentField_Reserved_2,
472   /// \brief Source[4] in Fortran, do not use for C++
473   IdentField_Reserved_3,
474   /// \brief String describing the source location. The string is composed of
475   /// semi-colon separated fields which describe the source file, the function
476   /// and a pair of line numbers that delimit the construct.
477   IdentField_PSource
478 };
479 
480 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from
481 /// the enum sched_type in kmp.h).
482 enum OpenMPSchedType {
483   /// \brief Lower bound for default (unordered) versions.
484   OMP_sch_lower = 32,
485   OMP_sch_static_chunked = 33,
486   OMP_sch_static = 34,
487   OMP_sch_dynamic_chunked = 35,
488   OMP_sch_guided_chunked = 36,
489   OMP_sch_runtime = 37,
490   OMP_sch_auto = 38,
491   /// \brief Lower bound for 'ordered' versions.
492   OMP_ord_lower = 64,
493   OMP_ord_static_chunked = 65,
494   OMP_ord_static = 66,
495   OMP_ord_dynamic_chunked = 67,
496   OMP_ord_guided_chunked = 68,
497   OMP_ord_runtime = 69,
498   OMP_ord_auto = 70,
499   OMP_sch_default = OMP_sch_static,
500   /// \brief dist_schedule types
501   OMP_dist_sch_static_chunked = 91,
502   OMP_dist_sch_static = 92,
503   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
504   /// Set if the monotonic schedule modifier was present.
505   OMP_sch_modifier_monotonic = (1 << 29),
506   /// Set if the nonmonotonic schedule modifier was present.
507   OMP_sch_modifier_nonmonotonic = (1 << 30),
508 };
509 
510 enum OpenMPRTLFunction {
511   /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
512   /// kmpc_micro microtask, ...);
513   OMPRTL__kmpc_fork_call,
514   /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc,
515   /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
516   OMPRTL__kmpc_threadprivate_cached,
517   /// \brief Call to void __kmpc_threadprivate_register( ident_t *,
518   /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
519   OMPRTL__kmpc_threadprivate_register,
520   // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
521   OMPRTL__kmpc_global_thread_num,
522   // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
523   // kmp_critical_name *crit);
524   OMPRTL__kmpc_critical,
525   // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
526   // global_tid, kmp_critical_name *crit, uintptr_t hint);
527   OMPRTL__kmpc_critical_with_hint,
528   // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
529   // kmp_critical_name *crit);
530   OMPRTL__kmpc_end_critical,
531   // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
532   // global_tid);
533   OMPRTL__kmpc_cancel_barrier,
534   // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
535   OMPRTL__kmpc_barrier,
536   // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
537   OMPRTL__kmpc_for_static_fini,
538   // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
539   // global_tid);
540   OMPRTL__kmpc_serialized_parallel,
541   // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
542   // global_tid);
543   OMPRTL__kmpc_end_serialized_parallel,
544   // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
545   // kmp_int32 num_threads);
546   OMPRTL__kmpc_push_num_threads,
547   // Call to void __kmpc_flush(ident_t *loc);
548   OMPRTL__kmpc_flush,
549   // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
550   OMPRTL__kmpc_master,
551   // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
552   OMPRTL__kmpc_end_master,
553   // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
554   // int end_part);
555   OMPRTL__kmpc_omp_taskyield,
556   // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
557   OMPRTL__kmpc_single,
558   // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
559   OMPRTL__kmpc_end_single,
560   // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
561   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
562   // kmp_routine_entry_t *task_entry);
563   OMPRTL__kmpc_omp_task_alloc,
564   // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
565   // new_task);
566   OMPRTL__kmpc_omp_task,
567   // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
568   // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
569   // kmp_int32 didit);
570   OMPRTL__kmpc_copyprivate,
571   // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
572   // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
573   // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
574   OMPRTL__kmpc_reduce,
575   // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
576   // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
577   // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
578   // *lck);
579   OMPRTL__kmpc_reduce_nowait,
580   // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
581   // kmp_critical_name *lck);
582   OMPRTL__kmpc_end_reduce,
583   // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
584   // kmp_critical_name *lck);
585   OMPRTL__kmpc_end_reduce_nowait,
586   // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
587   // kmp_task_t * new_task);
588   OMPRTL__kmpc_omp_task_begin_if0,
589   // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
590   // kmp_task_t * new_task);
591   OMPRTL__kmpc_omp_task_complete_if0,
592   // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
593   OMPRTL__kmpc_ordered,
594   // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
595   OMPRTL__kmpc_end_ordered,
596   // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
597   // global_tid);
598   OMPRTL__kmpc_omp_taskwait,
599   // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
600   OMPRTL__kmpc_taskgroup,
601   // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
602   OMPRTL__kmpc_end_taskgroup,
603   // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
604   // int proc_bind);
605   OMPRTL__kmpc_push_proc_bind,
606   // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
607   // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
608   // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
609   OMPRTL__kmpc_omp_task_with_deps,
610   // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
611   // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
612   // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
613   OMPRTL__kmpc_omp_wait_deps,
614   // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
615   // global_tid, kmp_int32 cncl_kind);
616   OMPRTL__kmpc_cancellationpoint,
617   // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
618   // kmp_int32 cncl_kind);
619   OMPRTL__kmpc_cancel,
620   // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
621   // kmp_int32 num_teams, kmp_int32 thread_limit);
622   OMPRTL__kmpc_push_num_teams,
623   // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
624   // microtask, ...);
625   OMPRTL__kmpc_fork_teams,
626   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
627   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
628   // sched, kmp_uint64 grainsize, void *task_dup);
629   OMPRTL__kmpc_taskloop,
630 
631   //
632   // Offloading related calls
633   //
634   // Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
635   // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
636   // *arg_types);
637   OMPRTL__tgt_target,
638   // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
639   // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
640   // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
641   OMPRTL__tgt_target_teams,
642   // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
643   OMPRTL__tgt_register_lib,
644   // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
645   OMPRTL__tgt_unregister_lib,
646   // Call to void __tgt_target_data_begin(int32_t device_id, int32_t arg_num,
647   // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
648   OMPRTL__tgt_target_data_begin,
649   // Call to void __tgt_target_data_end(int32_t device_id, int32_t arg_num,
650   // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
651   OMPRTL__tgt_target_data_end,
652 };
653 
654 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
655 /// region.
656 class CleanupTy final : public EHScopeStack::Cleanup {
657   PrePostActionTy *Action;
658 
659 public:
660   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
661   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
662     if (!CGF.HaveInsertPoint())
663       return;
664     Action->Exit(CGF);
665   }
666 };
667 
668 } // anonymous namespace
669 
670 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
671   CodeGenFunction::RunCleanupsScope Scope(CGF);
672   if (PrePostAction) {
673     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
674     Callback(CodeGen, CGF, *PrePostAction);
675   } else {
676     PrePostActionTy Action;
677     Callback(CodeGen, CGF, Action);
678   }
679 }
680 
681 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
682   return CGF.EmitLoadOfPointerLValue(
683       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
684       getThreadIDVariable()->getType()->castAs<PointerType>());
685 }
686 
687 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
688   if (!CGF.HaveInsertPoint())
689     return;
690   // 1.2.2 OpenMP Language Terminology
691   // Structured block - An executable statement with a single entry at the
692   // top and a single exit at the bottom.
693   // The point of exit cannot be a branch out of the structured block.
694   // longjmp() and throw() must not violate the entry/exit criteria.
695   CGF.EHStack.pushTerminate();
696   CodeGen(CGF);
697   CGF.EHStack.popTerminate();
698 }
699 
700 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
701     CodeGenFunction &CGF) {
702   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
703                             getThreadIDVariable()->getType(),
704                             AlignmentSource::Decl);
705 }
706 
707 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
708     : CGM(CGM), OffloadEntriesInfoManager(CGM) {
709   IdentTy = llvm::StructType::create(
710       "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
711       CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
712       CGM.Int8PtrTy /* psource */, nullptr);
713   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
714 
715   loadOffloadInfoMetadata();
716 }
717 
718 void CGOpenMPRuntime::clear() {
719   InternalVars.clear();
720 }
721 
722 static llvm::Function *
723 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
724                           const Expr *CombinerInitializer, const VarDecl *In,
725                           const VarDecl *Out, bool IsCombiner) {
726   // void .omp_combiner.(Ty *in, Ty *out);
727   auto &C = CGM.getContext();
728   QualType PtrTy = C.getPointerType(Ty).withRestrict();
729   FunctionArgList Args;
730   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
731                                /*Id=*/nullptr, PtrTy);
732   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
733                               /*Id=*/nullptr, PtrTy);
734   Args.push_back(&OmpOutParm);
735   Args.push_back(&OmpInParm);
736   auto &FnInfo =
737       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
738   auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
739   auto *Fn = llvm::Function::Create(
740       FnTy, llvm::GlobalValue::InternalLinkage,
741       IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule());
742   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
743   Fn->addFnAttr(llvm::Attribute::AlwaysInline);
744   CodeGenFunction CGF(CGM);
745   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
746   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
747   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
748   CodeGenFunction::OMPPrivateScope Scope(CGF);
749   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
750   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address {
751     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
752         .getAddress();
753   });
754   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
755   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() -> Address {
756     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
757         .getAddress();
758   });
759   (void)Scope.Privatize();
760   CGF.EmitIgnoredExpr(CombinerInitializer);
761   Scope.ForceCleanup();
762   CGF.FinishFunction();
763   return Fn;
764 }
765 
766 void CGOpenMPRuntime::emitUserDefinedReduction(
767     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
768   if (UDRMap.count(D) > 0)
769     return;
770   auto &C = CGM.getContext();
771   if (!In || !Out) {
772     In = &C.Idents.get("omp_in");
773     Out = &C.Idents.get("omp_out");
774   }
775   llvm::Function *Combiner = emitCombinerOrInitializer(
776       CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()),
777       cast<VarDecl>(D->lookup(Out).front()),
778       /*IsCombiner=*/true);
779   llvm::Function *Initializer = nullptr;
780   if (auto *Init = D->getInitializer()) {
781     if (!Priv || !Orig) {
782       Priv = &C.Idents.get("omp_priv");
783       Orig = &C.Idents.get("omp_orig");
784     }
785     Initializer = emitCombinerOrInitializer(
786         CGM, D->getType(), Init, cast<VarDecl>(D->lookup(Orig).front()),
787         cast<VarDecl>(D->lookup(Priv).front()),
788         /*IsCombiner=*/false);
789   }
790   UDRMap.insert(std::make_pair(D, std::make_pair(Combiner, Initializer)));
791   if (CGF) {
792     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
793     Decls.second.push_back(D);
794   }
795 }
796 
797 std::pair<llvm::Function *, llvm::Function *>
798 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
799   auto I = UDRMap.find(D);
800   if (I != UDRMap.end())
801     return I->second;
802   emitUserDefinedReduction(/*CGF=*/nullptr, D);
803   return UDRMap.lookup(D);
804 }
805 
806 // Layout information for ident_t.
807 static CharUnits getIdentAlign(CodeGenModule &CGM) {
808   return CGM.getPointerAlign();
809 }
810 static CharUnits getIdentSize(CodeGenModule &CGM) {
811   assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign()));
812   return CharUnits::fromQuantity(16) + CGM.getPointerSize();
813 }
814 static CharUnits getOffsetOfIdentField(IdentFieldIndex Field) {
815   // All the fields except the last are i32, so this works beautifully.
816   return unsigned(Field) * CharUnits::fromQuantity(4);
817 }
818 static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr,
819                                    IdentFieldIndex Field,
820                                    const llvm::Twine &Name = "") {
821   auto Offset = getOffsetOfIdentField(Field);
822   return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name);
823 }
824 
825 llvm::Value *CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction(
826     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
827     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
828   assert(ThreadIDVar->getType()->isPointerType() &&
829          "thread id variable must be of type kmp_int32 *");
830   const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
831   CodeGenFunction CGF(CGM, true);
832   bool HasCancel = false;
833   if (auto *OPD = dyn_cast<OMPParallelDirective>(&D))
834     HasCancel = OPD->hasCancel();
835   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
836     HasCancel = OPSD->hasCancel();
837   else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
838     HasCancel = OPFD->hasCancel();
839   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
840                                     HasCancel);
841   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
842   return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
843 }
844 
845 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
846     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
847     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
848     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
849     bool Tied, unsigned &NumberOfParts) {
850   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
851                                               PrePostActionTy &) {
852     auto *ThreadID = getThreadID(CGF, D.getLocStart());
853     auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart());
854     llvm::Value *TaskArgs[] = {
855         UpLoc, ThreadID,
856         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
857                                     TaskTVar->getType()->castAs<PointerType>())
858             .getPointer()};
859     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
860   };
861   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
862                                                             UntiedCodeGen);
863   CodeGen.setAction(Action);
864   assert(!ThreadIDVar->getType()->isPointerType() &&
865          "thread id variable must be of type kmp_int32 for tasks");
866   auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
867   auto *TD = dyn_cast<OMPTaskDirective>(&D);
868   CodeGenFunction CGF(CGM, true);
869   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
870                                         InnermostKind,
871                                         TD ? TD->hasCancel() : false, Action);
872   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
873   auto *Res = CGF.GenerateCapturedStmtFunction(*CS);
874   if (!Tied)
875     NumberOfParts = Action.getNumberOfParts();
876   return Res;
877 }
878 
879 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
880   CharUnits Align = getIdentAlign(CGM);
881   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
882   if (!Entry) {
883     if (!DefaultOpenMPPSource) {
884       // Initialize default location for psource field of ident_t structure of
885       // all ident_t objects. Format is ";file;function;line;column;;".
886       // Taken from
887       // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
888       DefaultOpenMPPSource =
889           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
890       DefaultOpenMPPSource =
891           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
892     }
893     auto DefaultOpenMPLocation = new llvm::GlobalVariable(
894         CGM.getModule(), IdentTy, /*isConstant*/ true,
895         llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr);
896     DefaultOpenMPLocation->setUnnamedAddr(true);
897     DefaultOpenMPLocation->setAlignment(Align.getQuantity());
898 
899     llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true);
900     llvm::Constant *Values[] = {Zero,
901                                 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
902                                 Zero, Zero, DefaultOpenMPPSource};
903     llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values);
904     DefaultOpenMPLocation->setInitializer(Init);
905     OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
906   }
907   return Address(Entry, Align);
908 }
909 
910 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
911                                                  SourceLocation Loc,
912                                                  unsigned Flags) {
913   Flags |= OMP_IDENT_KMPC;
914   // If no debug info is generated - return global default location.
915   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
916       Loc.isInvalid())
917     return getOrCreateDefaultLocation(Flags).getPointer();
918 
919   assert(CGF.CurFn && "No function in current CodeGenFunction.");
920 
921   Address LocValue = Address::invalid();
922   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
923   if (I != OpenMPLocThreadIDMap.end())
924     LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM));
925 
926   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
927   // GetOpenMPThreadID was called before this routine.
928   if (!LocValue.isValid()) {
929     // Generate "ident_t .kmpc_loc.addr;"
930     Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM),
931                                       ".kmpc_loc.addr");
932     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
933     Elem.second.DebugLoc = AI.getPointer();
934     LocValue = AI;
935 
936     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
937     CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
938     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
939                              CGM.getSize(getIdentSize(CGF.CGM)));
940   }
941 
942   // char **psource = &.kmpc_loc_<flags>.addr.psource;
943   Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource);
944 
945   auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
946   if (OMPDebugLoc == nullptr) {
947     SmallString<128> Buffer2;
948     llvm::raw_svector_ostream OS2(Buffer2);
949     // Build debug location
950     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
951     OS2 << ";" << PLoc.getFilename() << ";";
952     if (const FunctionDecl *FD =
953             dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
954       OS2 << FD->getQualifiedNameAsString();
955     }
956     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
957     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
958     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
959   }
960   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
961   CGF.Builder.CreateStore(OMPDebugLoc, PSource);
962 
963   // Our callers always pass this to a runtime function, so for
964   // convenience, go ahead and return a naked pointer.
965   return LocValue.getPointer();
966 }
967 
968 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
969                                           SourceLocation Loc) {
970   assert(CGF.CurFn && "No function in current CodeGenFunction.");
971 
972   llvm::Value *ThreadID = nullptr;
973   // Check whether we've already cached a load of the thread id in this
974   // function.
975   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
976   if (I != OpenMPLocThreadIDMap.end()) {
977     ThreadID = I->second.ThreadID;
978     if (ThreadID != nullptr)
979       return ThreadID;
980   }
981   if (auto *OMPRegionInfo =
982           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
983     if (OMPRegionInfo->getThreadIDVariable()) {
984       // Check if this an outlined function with thread id passed as argument.
985       auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
986       ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
987       // If value loaded in entry block, cache it and use it everywhere in
988       // function.
989       if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
990         auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
991         Elem.second.ThreadID = ThreadID;
992       }
993       return ThreadID;
994     }
995   }
996 
997   // This is not an outlined function region - need to call __kmpc_int32
998   // kmpc_global_thread_num(ident_t *loc).
999   // Generate thread id value and cache this value for use across the
1000   // function.
1001   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1002   CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
1003   ThreadID =
1004       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1005                           emitUpdateLocation(CGF, Loc));
1006   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1007   Elem.second.ThreadID = ThreadID;
1008   return ThreadID;
1009 }
1010 
1011 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1012   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1013   if (OpenMPLocThreadIDMap.count(CGF.CurFn))
1014     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1015   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1016     for(auto *D : FunctionUDRMap[CGF.CurFn]) {
1017       UDRMap.erase(D);
1018     }
1019     FunctionUDRMap.erase(CGF.CurFn);
1020   }
1021 }
1022 
1023 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1024   if (!IdentTy) {
1025   }
1026   return llvm::PointerType::getUnqual(IdentTy);
1027 }
1028 
1029 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1030   if (!Kmpc_MicroTy) {
1031     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1032     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1033                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1034     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1035   }
1036   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1037 }
1038 
1039 llvm::Constant *
1040 CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1041   llvm::Constant *RTLFn = nullptr;
1042   switch (static_cast<OpenMPRTLFunction>(Function)) {
1043   case OMPRTL__kmpc_fork_call: {
1044     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1045     // microtask, ...);
1046     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1047                                 getKmpc_MicroPointerTy()};
1048     llvm::FunctionType *FnTy =
1049         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1050     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1051     break;
1052   }
1053   case OMPRTL__kmpc_global_thread_num: {
1054     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1055     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1056     llvm::FunctionType *FnTy =
1057         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1058     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1059     break;
1060   }
1061   case OMPRTL__kmpc_threadprivate_cached: {
1062     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1063     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1064     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1065                                 CGM.VoidPtrTy, CGM.SizeTy,
1066                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1067     llvm::FunctionType *FnTy =
1068         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1069     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1070     break;
1071   }
1072   case OMPRTL__kmpc_critical: {
1073     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1074     // kmp_critical_name *crit);
1075     llvm::Type *TypeParams[] = {
1076         getIdentTyPointerTy(), CGM.Int32Ty,
1077         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1078     llvm::FunctionType *FnTy =
1079         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1080     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1081     break;
1082   }
1083   case OMPRTL__kmpc_critical_with_hint: {
1084     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1085     // kmp_critical_name *crit, uintptr_t hint);
1086     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1087                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
1088                                 CGM.IntPtrTy};
1089     llvm::FunctionType *FnTy =
1090         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1091     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1092     break;
1093   }
1094   case OMPRTL__kmpc_threadprivate_register: {
1095     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1096     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1097     // typedef void *(*kmpc_ctor)(void *);
1098     auto KmpcCtorTy =
1099         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1100                                 /*isVarArg*/ false)->getPointerTo();
1101     // typedef void *(*kmpc_cctor)(void *, void *);
1102     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1103     auto KmpcCopyCtorTy =
1104         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1105                                 /*isVarArg*/ false)->getPointerTo();
1106     // typedef void (*kmpc_dtor)(void *);
1107     auto KmpcDtorTy =
1108         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1109             ->getPointerTo();
1110     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1111                               KmpcCopyCtorTy, KmpcDtorTy};
1112     auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1113                                         /*isVarArg*/ false);
1114     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1115     break;
1116   }
1117   case OMPRTL__kmpc_end_critical: {
1118     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1119     // kmp_critical_name *crit);
1120     llvm::Type *TypeParams[] = {
1121         getIdentTyPointerTy(), CGM.Int32Ty,
1122         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1123     llvm::FunctionType *FnTy =
1124         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1125     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1126     break;
1127   }
1128   case OMPRTL__kmpc_cancel_barrier: {
1129     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1130     // global_tid);
1131     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1132     llvm::FunctionType *FnTy =
1133         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1134     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1135     break;
1136   }
1137   case OMPRTL__kmpc_barrier: {
1138     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1139     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1140     llvm::FunctionType *FnTy =
1141         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1142     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1143     break;
1144   }
1145   case OMPRTL__kmpc_for_static_fini: {
1146     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1147     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1148     llvm::FunctionType *FnTy =
1149         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1150     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1151     break;
1152   }
1153   case OMPRTL__kmpc_push_num_threads: {
1154     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1155     // kmp_int32 num_threads)
1156     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1157                                 CGM.Int32Ty};
1158     llvm::FunctionType *FnTy =
1159         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1160     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1161     break;
1162   }
1163   case OMPRTL__kmpc_serialized_parallel: {
1164     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1165     // global_tid);
1166     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1167     llvm::FunctionType *FnTy =
1168         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1169     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1170     break;
1171   }
1172   case OMPRTL__kmpc_end_serialized_parallel: {
1173     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1174     // global_tid);
1175     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1176     llvm::FunctionType *FnTy =
1177         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1178     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1179     break;
1180   }
1181   case OMPRTL__kmpc_flush: {
1182     // Build void __kmpc_flush(ident_t *loc);
1183     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1184     llvm::FunctionType *FnTy =
1185         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1186     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1187     break;
1188   }
1189   case OMPRTL__kmpc_master: {
1190     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1191     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1192     llvm::FunctionType *FnTy =
1193         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1194     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1195     break;
1196   }
1197   case OMPRTL__kmpc_end_master: {
1198     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1199     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1200     llvm::FunctionType *FnTy =
1201         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1202     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1203     break;
1204   }
1205   case OMPRTL__kmpc_omp_taskyield: {
1206     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1207     // int end_part);
1208     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1209     llvm::FunctionType *FnTy =
1210         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1211     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1212     break;
1213   }
1214   case OMPRTL__kmpc_single: {
1215     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1216     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1217     llvm::FunctionType *FnTy =
1218         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1219     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1220     break;
1221   }
1222   case OMPRTL__kmpc_end_single: {
1223     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1224     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1225     llvm::FunctionType *FnTy =
1226         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1227     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1228     break;
1229   }
1230   case OMPRTL__kmpc_omp_task_alloc: {
1231     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1232     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1233     // kmp_routine_entry_t *task_entry);
1234     assert(KmpRoutineEntryPtrTy != nullptr &&
1235            "Type kmp_routine_entry_t must be created.");
1236     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1237                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
1238     // Return void * and then cast to particular kmp_task_t type.
1239     llvm::FunctionType *FnTy =
1240         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1241     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
1242     break;
1243   }
1244   case OMPRTL__kmpc_omp_task: {
1245     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1246     // *new_task);
1247     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1248                                 CGM.VoidPtrTy};
1249     llvm::FunctionType *FnTy =
1250         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1251     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
1252     break;
1253   }
1254   case OMPRTL__kmpc_copyprivate: {
1255     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
1256     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
1257     // kmp_int32 didit);
1258     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1259     auto *CpyFnTy =
1260         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
1261     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
1262                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
1263                                 CGM.Int32Ty};
1264     llvm::FunctionType *FnTy =
1265         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1266     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
1267     break;
1268   }
1269   case OMPRTL__kmpc_reduce: {
1270     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
1271     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
1272     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
1273     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1274     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1275                                                /*isVarArg=*/false);
1276     llvm::Type *TypeParams[] = {
1277         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1278         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1279         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1280     llvm::FunctionType *FnTy =
1281         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1282     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
1283     break;
1284   }
1285   case OMPRTL__kmpc_reduce_nowait: {
1286     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
1287     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
1288     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
1289     // *lck);
1290     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1291     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1292                                                /*isVarArg=*/false);
1293     llvm::Type *TypeParams[] = {
1294         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1295         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1296         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1297     llvm::FunctionType *FnTy =
1298         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1299     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
1300     break;
1301   }
1302   case OMPRTL__kmpc_end_reduce: {
1303     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
1304     // kmp_critical_name *lck);
1305     llvm::Type *TypeParams[] = {
1306         getIdentTyPointerTy(), CGM.Int32Ty,
1307         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1308     llvm::FunctionType *FnTy =
1309         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1310     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
1311     break;
1312   }
1313   case OMPRTL__kmpc_end_reduce_nowait: {
1314     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
1315     // kmp_critical_name *lck);
1316     llvm::Type *TypeParams[] = {
1317         getIdentTyPointerTy(), CGM.Int32Ty,
1318         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1319     llvm::FunctionType *FnTy =
1320         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1321     RTLFn =
1322         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
1323     break;
1324   }
1325   case OMPRTL__kmpc_omp_task_begin_if0: {
1326     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1327     // *new_task);
1328     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1329                                 CGM.VoidPtrTy};
1330     llvm::FunctionType *FnTy =
1331         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1332     RTLFn =
1333         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
1334     break;
1335   }
1336   case OMPRTL__kmpc_omp_task_complete_if0: {
1337     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1338     // *new_task);
1339     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1340                                 CGM.VoidPtrTy};
1341     llvm::FunctionType *FnTy =
1342         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1343     RTLFn = CGM.CreateRuntimeFunction(FnTy,
1344                                       /*Name=*/"__kmpc_omp_task_complete_if0");
1345     break;
1346   }
1347   case OMPRTL__kmpc_ordered: {
1348     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
1349     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1350     llvm::FunctionType *FnTy =
1351         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1352     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
1353     break;
1354   }
1355   case OMPRTL__kmpc_end_ordered: {
1356     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
1357     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1358     llvm::FunctionType *FnTy =
1359         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1360     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
1361     break;
1362   }
1363   case OMPRTL__kmpc_omp_taskwait: {
1364     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
1365     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1366     llvm::FunctionType *FnTy =
1367         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1368     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
1369     break;
1370   }
1371   case OMPRTL__kmpc_taskgroup: {
1372     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
1373     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1374     llvm::FunctionType *FnTy =
1375         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1376     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
1377     break;
1378   }
1379   case OMPRTL__kmpc_end_taskgroup: {
1380     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
1381     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1382     llvm::FunctionType *FnTy =
1383         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1384     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
1385     break;
1386   }
1387   case OMPRTL__kmpc_push_proc_bind: {
1388     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
1389     // int proc_bind)
1390     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1391     llvm::FunctionType *FnTy =
1392         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1393     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
1394     break;
1395   }
1396   case OMPRTL__kmpc_omp_task_with_deps: {
1397     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
1398     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
1399     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
1400     llvm::Type *TypeParams[] = {
1401         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
1402         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
1403     llvm::FunctionType *FnTy =
1404         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1405     RTLFn =
1406         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
1407     break;
1408   }
1409   case OMPRTL__kmpc_omp_wait_deps: {
1410     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
1411     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
1412     // kmp_depend_info_t *noalias_dep_list);
1413     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1414                                 CGM.Int32Ty,           CGM.VoidPtrTy,
1415                                 CGM.Int32Ty,           CGM.VoidPtrTy};
1416     llvm::FunctionType *FnTy =
1417         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1418     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
1419     break;
1420   }
1421   case OMPRTL__kmpc_cancellationpoint: {
1422     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
1423     // global_tid, kmp_int32 cncl_kind)
1424     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1425     llvm::FunctionType *FnTy =
1426         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1427     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
1428     break;
1429   }
1430   case OMPRTL__kmpc_cancel: {
1431     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
1432     // kmp_int32 cncl_kind)
1433     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1434     llvm::FunctionType *FnTy =
1435         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1436     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
1437     break;
1438   }
1439   case OMPRTL__kmpc_push_num_teams: {
1440     // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
1441     // kmp_int32 num_teams, kmp_int32 num_threads)
1442     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1443         CGM.Int32Ty};
1444     llvm::FunctionType *FnTy =
1445         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1446     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
1447     break;
1448   }
1449   case OMPRTL__kmpc_fork_teams: {
1450     // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
1451     // microtask, ...);
1452     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1453                                 getKmpc_MicroPointerTy()};
1454     llvm::FunctionType *FnTy =
1455         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1456     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
1457     break;
1458   }
1459   case OMPRTL__kmpc_taskloop: {
1460     // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
1461     // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
1462     // sched, kmp_uint64 grainsize, void *task_dup);
1463     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
1464                                 CGM.IntTy,
1465                                 CGM.VoidPtrTy,
1466                                 CGM.IntTy,
1467                                 CGM.Int64Ty->getPointerTo(),
1468                                 CGM.Int64Ty->getPointerTo(),
1469                                 CGM.Int64Ty,
1470                                 CGM.IntTy,
1471                                 CGM.IntTy,
1472                                 CGM.Int64Ty,
1473                                 CGM.VoidPtrTy};
1474     llvm::FunctionType *FnTy =
1475         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1476     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
1477     break;
1478   }
1479   case OMPRTL__tgt_target: {
1480     // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
1481     // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
1482     // *arg_types);
1483     llvm::Type *TypeParams[] = {CGM.Int32Ty,
1484                                 CGM.VoidPtrTy,
1485                                 CGM.Int32Ty,
1486                                 CGM.VoidPtrPtrTy,
1487                                 CGM.VoidPtrPtrTy,
1488                                 CGM.SizeTy->getPointerTo(),
1489                                 CGM.Int32Ty->getPointerTo()};
1490     llvm::FunctionType *FnTy =
1491         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1492     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
1493     break;
1494   }
1495   case OMPRTL__tgt_target_teams: {
1496     // Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
1497     // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
1498     // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
1499     llvm::Type *TypeParams[] = {CGM.Int32Ty,
1500                                 CGM.VoidPtrTy,
1501                                 CGM.Int32Ty,
1502                                 CGM.VoidPtrPtrTy,
1503                                 CGM.VoidPtrPtrTy,
1504                                 CGM.SizeTy->getPointerTo(),
1505                                 CGM.Int32Ty->getPointerTo(),
1506                                 CGM.Int32Ty,
1507                                 CGM.Int32Ty};
1508     llvm::FunctionType *FnTy =
1509         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1510     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
1511     break;
1512   }
1513   case OMPRTL__tgt_register_lib: {
1514     // Build void __tgt_register_lib(__tgt_bin_desc *desc);
1515     QualType ParamTy =
1516         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
1517     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
1518     llvm::FunctionType *FnTy =
1519         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1520     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
1521     break;
1522   }
1523   case OMPRTL__tgt_unregister_lib: {
1524     // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
1525     QualType ParamTy =
1526         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
1527     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
1528     llvm::FunctionType *FnTy =
1529         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1530     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
1531     break;
1532   }
1533   case OMPRTL__tgt_target_data_begin: {
1534     // Build void __tgt_target_data_begin(int32_t device_id, int32_t arg_num,
1535     // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
1536     llvm::Type *TypeParams[] = {CGM.Int32Ty,
1537                                 CGM.Int32Ty,
1538                                 CGM.VoidPtrPtrTy,
1539                                 CGM.VoidPtrPtrTy,
1540                                 CGM.SizeTy->getPointerTo(),
1541                                 CGM.Int32Ty->getPointerTo()};
1542     llvm::FunctionType *FnTy =
1543         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1544     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
1545     break;
1546   }
1547   case OMPRTL__tgt_target_data_end: {
1548     // Build void __tgt_target_data_end(int32_t device_id, int32_t arg_num,
1549     // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
1550     llvm::Type *TypeParams[] = {CGM.Int32Ty,
1551                                 CGM.Int32Ty,
1552                                 CGM.VoidPtrPtrTy,
1553                                 CGM.VoidPtrPtrTy,
1554                                 CGM.SizeTy->getPointerTo(),
1555                                 CGM.Int32Ty->getPointerTo()};
1556     llvm::FunctionType *FnTy =
1557         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1558     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
1559     break;
1560   }
1561   }
1562   assert(RTLFn && "Unable to find OpenMP runtime function");
1563   return RTLFn;
1564 }
1565 
1566 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
1567                                                              bool IVSigned) {
1568   assert((IVSize == 32 || IVSize == 64) &&
1569          "IV size is not compatible with the omp runtime");
1570   auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1571                                        : "__kmpc_for_static_init_4u")
1572                            : (IVSigned ? "__kmpc_for_static_init_8"
1573                                        : "__kmpc_for_static_init_8u");
1574   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1575   auto PtrTy = llvm::PointerType::getUnqual(ITy);
1576   llvm::Type *TypeParams[] = {
1577     getIdentTyPointerTy(),                     // loc
1578     CGM.Int32Ty,                               // tid
1579     CGM.Int32Ty,                               // schedtype
1580     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1581     PtrTy,                                     // p_lower
1582     PtrTy,                                     // p_upper
1583     PtrTy,                                     // p_stride
1584     ITy,                                       // incr
1585     ITy                                        // chunk
1586   };
1587   llvm::FunctionType *FnTy =
1588       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1589   return CGM.CreateRuntimeFunction(FnTy, Name);
1590 }
1591 
1592 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
1593                                                             bool IVSigned) {
1594   assert((IVSize == 32 || IVSize == 64) &&
1595          "IV size is not compatible with the omp runtime");
1596   auto Name =
1597       IVSize == 32
1598           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1599           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1600   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1601   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1602                                CGM.Int32Ty,           // tid
1603                                CGM.Int32Ty,           // schedtype
1604                                ITy,                   // lower
1605                                ITy,                   // upper
1606                                ITy,                   // stride
1607                                ITy                    // chunk
1608   };
1609   llvm::FunctionType *FnTy =
1610       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1611   return CGM.CreateRuntimeFunction(FnTy, Name);
1612 }
1613 
1614 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
1615                                                             bool IVSigned) {
1616   assert((IVSize == 32 || IVSize == 64) &&
1617          "IV size is not compatible with the omp runtime");
1618   auto Name =
1619       IVSize == 32
1620           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1621           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1622   llvm::Type *TypeParams[] = {
1623       getIdentTyPointerTy(), // loc
1624       CGM.Int32Ty,           // tid
1625   };
1626   llvm::FunctionType *FnTy =
1627       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1628   return CGM.CreateRuntimeFunction(FnTy, Name);
1629 }
1630 
1631 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
1632                                                             bool IVSigned) {
1633   assert((IVSize == 32 || IVSize == 64) &&
1634          "IV size is not compatible with the omp runtime");
1635   auto Name =
1636       IVSize == 32
1637           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1638           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1639   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1640   auto PtrTy = llvm::PointerType::getUnqual(ITy);
1641   llvm::Type *TypeParams[] = {
1642     getIdentTyPointerTy(),                     // loc
1643     CGM.Int32Ty,                               // tid
1644     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1645     PtrTy,                                     // p_lower
1646     PtrTy,                                     // p_upper
1647     PtrTy                                      // p_stride
1648   };
1649   llvm::FunctionType *FnTy =
1650       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1651   return CGM.CreateRuntimeFunction(FnTy, Name);
1652 }
1653 
1654 llvm::Constant *
1655 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1656   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1657          !CGM.getContext().getTargetInfo().isTLSSupported());
1658   // Lookup the entry, lazily creating it if necessary.
1659   return getOrCreateInternalVariable(CGM.Int8PtrPtrTy,
1660                                      Twine(CGM.getMangledName(VD)) + ".cache.");
1661 }
1662 
1663 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1664                                                 const VarDecl *VD,
1665                                                 Address VDAddr,
1666                                                 SourceLocation Loc) {
1667   if (CGM.getLangOpts().OpenMPUseTLS &&
1668       CGM.getContext().getTargetInfo().isTLSSupported())
1669     return VDAddr;
1670 
1671   auto VarTy = VDAddr.getElementType();
1672   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1673                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1674                                                        CGM.Int8PtrTy),
1675                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1676                          getOrCreateThreadPrivateCache(VD)};
1677   return Address(CGF.EmitRuntimeCall(
1678       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
1679                  VDAddr.getAlignment());
1680 }
1681 
1682 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1683     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1684     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1685   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1686   // library.
1687   auto OMPLoc = emitUpdateLocation(CGF, Loc);
1688   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1689                       OMPLoc);
1690   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1691   // to register constructor/destructor for variable.
1692   llvm::Value *Args[] = {OMPLoc,
1693                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1694                                                        CGM.VoidPtrTy),
1695                          Ctor, CopyCtor, Dtor};
1696   CGF.EmitRuntimeCall(
1697       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
1698 }
1699 
1700 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1701     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1702     bool PerformInit, CodeGenFunction *CGF) {
1703   if (CGM.getLangOpts().OpenMPUseTLS &&
1704       CGM.getContext().getTargetInfo().isTLSSupported())
1705     return nullptr;
1706 
1707   VD = VD->getDefinition(CGM.getContext());
1708   if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
1709     ThreadPrivateWithDefinition.insert(VD);
1710     QualType ASTTy = VD->getType();
1711 
1712     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1713     auto Init = VD->getAnyInitializer();
1714     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1715       // Generate function that re-emits the declaration's initializer into the
1716       // threadprivate copy of the variable VD
1717       CodeGenFunction CtorCGF(CGM);
1718       FunctionArgList Args;
1719       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
1720                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
1721       Args.push_back(&Dst);
1722 
1723       auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1724           CGM.getContext().VoidPtrTy, Args);
1725       auto FTy = CGM.getTypes().GetFunctionType(FI);
1726       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
1727           FTy, ".__kmpc_global_ctor_.", FI, Loc);
1728       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1729                             Args, SourceLocation());
1730       auto ArgVal = CtorCGF.EmitLoadOfScalar(
1731           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1732           CGM.getContext().VoidPtrTy, Dst.getLocation());
1733       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1734       Arg = CtorCGF.Builder.CreateElementBitCast(Arg,
1735                                              CtorCGF.ConvertTypeForMem(ASTTy));
1736       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1737                                /*IsInitializer=*/true);
1738       ArgVal = CtorCGF.EmitLoadOfScalar(
1739           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1740           CGM.getContext().VoidPtrTy, Dst.getLocation());
1741       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1742       CtorCGF.FinishFunction();
1743       Ctor = Fn;
1744     }
1745     if (VD->getType().isDestructedType() != QualType::DK_none) {
1746       // Generate function that emits destructor call for the threadprivate copy
1747       // of the variable VD
1748       CodeGenFunction DtorCGF(CGM);
1749       FunctionArgList Args;
1750       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
1751                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
1752       Args.push_back(&Dst);
1753 
1754       auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1755           CGM.getContext().VoidTy, Args);
1756       auto FTy = CGM.getTypes().GetFunctionType(FI);
1757       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
1758           FTy, ".__kmpc_global_dtor_.", FI, Loc);
1759       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1760       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1761                             SourceLocation());
1762       // Create a scope with an artificial location for the body of this function.
1763       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1764       auto ArgVal = DtorCGF.EmitLoadOfScalar(
1765           DtorCGF.GetAddrOfLocalVar(&Dst),
1766           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1767       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1768                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1769                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1770       DtorCGF.FinishFunction();
1771       Dtor = Fn;
1772     }
1773     // Do not emit init function if it is not required.
1774     if (!Ctor && !Dtor)
1775       return nullptr;
1776 
1777     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1778     auto CopyCtorTy =
1779         llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1780                                 /*isVarArg=*/false)->getPointerTo();
1781     // Copying constructor for the threadprivate variable.
1782     // Must be NULL - reserved by runtime, but currently it requires that this
1783     // parameter is always NULL. Otherwise it fires assertion.
1784     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1785     if (Ctor == nullptr) {
1786       auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1787                                             /*isVarArg=*/false)->getPointerTo();
1788       Ctor = llvm::Constant::getNullValue(CtorTy);
1789     }
1790     if (Dtor == nullptr) {
1791       auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1792                                             /*isVarArg=*/false)->getPointerTo();
1793       Dtor = llvm::Constant::getNullValue(DtorTy);
1794     }
1795     if (!CGF) {
1796       auto InitFunctionTy =
1797           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1798       auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
1799           InitFunctionTy, ".__omp_threadprivate_init_.",
1800           CGM.getTypes().arrangeNullaryFunction());
1801       CodeGenFunction InitCGF(CGM);
1802       FunctionArgList ArgList;
1803       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1804                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1805                             Loc);
1806       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1807       InitCGF.FinishFunction();
1808       return InitFunction;
1809     }
1810     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1811   }
1812   return nullptr;
1813 }
1814 
1815 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
1816 /// function. Here is the logic:
1817 /// if (Cond) {
1818 ///   ThenGen();
1819 /// } else {
1820 ///   ElseGen();
1821 /// }
1822 static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
1823                             const RegionCodeGenTy &ThenGen,
1824                             const RegionCodeGenTy &ElseGen) {
1825   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1826 
1827   // If the condition constant folds and can be elided, try to avoid emitting
1828   // the condition and the dead arm of the if/else.
1829   bool CondConstant;
1830   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1831     if (CondConstant)
1832       ThenGen(CGF);
1833     else
1834       ElseGen(CGF);
1835     return;
1836   }
1837 
1838   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
1839   // emit the conditional branch.
1840   auto ThenBlock = CGF.createBasicBlock("omp_if.then");
1841   auto ElseBlock = CGF.createBasicBlock("omp_if.else");
1842   auto ContBlock = CGF.createBasicBlock("omp_if.end");
1843   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1844 
1845   // Emit the 'then' code.
1846   CGF.EmitBlock(ThenBlock);
1847   ThenGen(CGF);
1848   CGF.EmitBranch(ContBlock);
1849   // Emit the 'else' code if present.
1850   // There is no need to emit line number for unconditional branch.
1851   (void)ApplyDebugLocation::CreateEmpty(CGF);
1852   CGF.EmitBlock(ElseBlock);
1853   ElseGen(CGF);
1854   // There is no need to emit line number for unconditional branch.
1855   (void)ApplyDebugLocation::CreateEmpty(CGF);
1856   CGF.EmitBranch(ContBlock);
1857   // Emit the continuation block for code after the if.
1858   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1859 }
1860 
1861 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1862                                        llvm::Value *OutlinedFn,
1863                                        ArrayRef<llvm::Value *> CapturedVars,
1864                                        const Expr *IfCond) {
1865   if (!CGF.HaveInsertPoint())
1866     return;
1867   auto *RTLoc = emitUpdateLocation(CGF, Loc);
1868   auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
1869                                                      PrePostActionTy &) {
1870     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1871     auto &RT = CGF.CGM.getOpenMPRuntime();
1872     llvm::Value *Args[] = {
1873         RTLoc,
1874         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1875         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
1876     llvm::SmallVector<llvm::Value *, 16> RealArgs;
1877     RealArgs.append(std::begin(Args), std::end(Args));
1878     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1879 
1880     auto RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
1881     CGF.EmitRuntimeCall(RTLFn, RealArgs);
1882   };
1883   auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
1884                                                           PrePostActionTy &) {
1885     auto &RT = CGF.CGM.getOpenMPRuntime();
1886     auto ThreadID = RT.getThreadID(CGF, Loc);
1887     // Build calls:
1888     // __kmpc_serialized_parallel(&Loc, GTid);
1889     llvm::Value *Args[] = {RTLoc, ThreadID};
1890     CGF.EmitRuntimeCall(
1891         RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
1892 
1893     // OutlinedFn(&GTid, &zero, CapturedStruct);
1894     auto ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1895     Address ZeroAddr =
1896         CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
1897                              /*Name*/ ".zero.addr");
1898     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
1899     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
1900     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
1901     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
1902     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1903     CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
1904 
1905     // __kmpc_end_serialized_parallel(&Loc, GTid);
1906     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1907     CGF.EmitRuntimeCall(
1908         RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
1909         EndArgs);
1910   };
1911   if (IfCond)
1912     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
1913   else {
1914     RegionCodeGenTy ThenRCG(ThenGen);
1915     ThenRCG(CGF);
1916   }
1917 }
1918 
1919 // If we're inside an (outlined) parallel region, use the region info's
1920 // thread-ID variable (it is passed in a first argument of the outlined function
1921 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1922 // regular serial code region, get thread ID by calling kmp_int32
1923 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1924 // return the address of that temp.
1925 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
1926                                              SourceLocation Loc) {
1927   if (auto *OMPRegionInfo =
1928           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1929     if (OMPRegionInfo->getThreadIDVariable())
1930       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1931 
1932   auto ThreadID = getThreadID(CGF, Loc);
1933   auto Int32Ty =
1934       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1935   auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1936   CGF.EmitStoreOfScalar(ThreadID,
1937                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
1938 
1939   return ThreadIDTemp;
1940 }
1941 
1942 llvm::Constant *
1943 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
1944                                              const llvm::Twine &Name) {
1945   SmallString<256> Buffer;
1946   llvm::raw_svector_ostream Out(Buffer);
1947   Out << Name;
1948   auto RuntimeName = Out.str();
1949   auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
1950   if (Elem.second) {
1951     assert(Elem.second->getType()->getPointerElementType() == Ty &&
1952            "OMP internal variable has different type than requested");
1953     return &*Elem.second;
1954   }
1955 
1956   return Elem.second = new llvm::GlobalVariable(
1957              CGM.getModule(), Ty, /*IsConstant*/ false,
1958              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
1959              Elem.first());
1960 }
1961 
1962 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1963   llvm::Twine Name(".gomp_critical_user_", CriticalName);
1964   return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
1965 }
1966 
1967 namespace {
1968 /// Common pre(post)-action for different OpenMP constructs.
1969 class CommonActionTy final : public PrePostActionTy {
1970   llvm::Value *EnterCallee;
1971   ArrayRef<llvm::Value *> EnterArgs;
1972   llvm::Value *ExitCallee;
1973   ArrayRef<llvm::Value *> ExitArgs;
1974   bool Conditional;
1975   llvm::BasicBlock *ContBlock = nullptr;
1976 
1977 public:
1978   CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs,
1979                  llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs,
1980                  bool Conditional = false)
1981       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
1982         ExitArgs(ExitArgs), Conditional(Conditional) {}
1983   void Enter(CodeGenFunction &CGF) override {
1984     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
1985     if (Conditional) {
1986       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
1987       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1988       ContBlock = CGF.createBasicBlock("omp_if.end");
1989       // Generate the branch (If-stmt)
1990       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1991       CGF.EmitBlock(ThenBlock);
1992     }
1993   }
1994   void Done(CodeGenFunction &CGF) {
1995     // Emit the rest of blocks/branches
1996     CGF.EmitBranch(ContBlock);
1997     CGF.EmitBlock(ContBlock, true);
1998   }
1999   void Exit(CodeGenFunction &CGF) override {
2000     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2001   }
2002 };
2003 } // anonymous namespace
2004 
2005 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2006                                          StringRef CriticalName,
2007                                          const RegionCodeGenTy &CriticalOpGen,
2008                                          SourceLocation Loc, const Expr *Hint) {
2009   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2010   // CriticalOpGen();
2011   // __kmpc_end_critical(ident_t *, gtid, Lock);
2012   // Prepare arguments and build a call to __kmpc_critical
2013   if (!CGF.HaveInsertPoint())
2014     return;
2015   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2016                          getCriticalRegionLock(CriticalName)};
2017   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2018                                                 std::end(Args));
2019   if (Hint) {
2020     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2021         CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
2022   }
2023   CommonActionTy Action(
2024       createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
2025                                  : OMPRTL__kmpc_critical),
2026       EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
2027   CriticalOpGen.setAction(Action);
2028   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2029 }
2030 
2031 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2032                                        const RegionCodeGenTy &MasterOpGen,
2033                                        SourceLocation Loc) {
2034   if (!CGF.HaveInsertPoint())
2035     return;
2036   // if(__kmpc_master(ident_t *, gtid)) {
2037   //   MasterOpGen();
2038   //   __kmpc_end_master(ident_t *, gtid);
2039   // }
2040   // Prepare arguments and build a call to __kmpc_master
2041   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2042   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
2043                         createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
2044                         /*Conditional=*/true);
2045   MasterOpGen.setAction(Action);
2046   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2047   Action.Done(CGF);
2048 }
2049 
2050 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2051                                         SourceLocation Loc) {
2052   if (!CGF.HaveInsertPoint())
2053     return;
2054   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2055   llvm::Value *Args[] = {
2056       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2057       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2058   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
2059   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2060     Region->emitUntiedSwitch(CGF);
2061 }
2062 
2063 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2064                                           const RegionCodeGenTy &TaskgroupOpGen,
2065                                           SourceLocation Loc) {
2066   if (!CGF.HaveInsertPoint())
2067     return;
2068   // __kmpc_taskgroup(ident_t *, gtid);
2069   // TaskgroupOpGen();
2070   // __kmpc_end_taskgroup(ident_t *, gtid);
2071   // Prepare arguments and build a call to __kmpc_taskgroup
2072   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2073   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
2074                         createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
2075                         Args);
2076   TaskgroupOpGen.setAction(Action);
2077   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2078 }
2079 
2080 /// Given an array of pointers to variables, project the address of a
2081 /// given variable.
2082 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2083                                       unsigned Index, const VarDecl *Var) {
2084   // Pull out the pointer to the variable.
2085   Address PtrAddr =
2086       CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize());
2087   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2088 
2089   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2090   Addr = CGF.Builder.CreateElementBitCast(
2091       Addr, CGF.ConvertTypeForMem(Var->getType()));
2092   return Addr;
2093 }
2094 
2095 static llvm::Value *emitCopyprivateCopyFunction(
2096     CodeGenModule &CGM, llvm::Type *ArgsType,
2097     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2098     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) {
2099   auto &C = CGM.getContext();
2100   // void copy_func(void *LHSArg, void *RHSArg);
2101   FunctionArgList Args;
2102   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
2103                            C.VoidPtrTy);
2104   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
2105                            C.VoidPtrTy);
2106   Args.push_back(&LHSArg);
2107   Args.push_back(&RHSArg);
2108   auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2109   auto *Fn = llvm::Function::Create(
2110       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
2111       ".omp.copyprivate.copy_func", &CGM.getModule());
2112   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
2113   CodeGenFunction CGF(CGM);
2114   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
2115   // Dest = (void*[n])(LHSArg);
2116   // Src = (void*[n])(RHSArg);
2117   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2118       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2119       ArgsType), CGF.getPointerAlign());
2120   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2121       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2122       ArgsType), CGF.getPointerAlign());
2123   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2124   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2125   // ...
2126   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2127   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2128     auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2129     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2130 
2131     auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2132     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2133 
2134     auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2135     QualType Type = VD->getType();
2136     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2137   }
2138   CGF.FinishFunction();
2139   return Fn;
2140 }
2141 
2142 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2143                                        const RegionCodeGenTy &SingleOpGen,
2144                                        SourceLocation Loc,
2145                                        ArrayRef<const Expr *> CopyprivateVars,
2146                                        ArrayRef<const Expr *> SrcExprs,
2147                                        ArrayRef<const Expr *> DstExprs,
2148                                        ArrayRef<const Expr *> AssignmentOps) {
2149   if (!CGF.HaveInsertPoint())
2150     return;
2151   assert(CopyprivateVars.size() == SrcExprs.size() &&
2152          CopyprivateVars.size() == DstExprs.size() &&
2153          CopyprivateVars.size() == AssignmentOps.size());
2154   auto &C = CGM.getContext();
2155   // int32 did_it = 0;
2156   // if(__kmpc_single(ident_t *, gtid)) {
2157   //   SingleOpGen();
2158   //   __kmpc_end_single(ident_t *, gtid);
2159   //   did_it = 1;
2160   // }
2161   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2162   // <copy_func>, did_it);
2163 
2164   Address DidIt = Address::invalid();
2165   if (!CopyprivateVars.empty()) {
2166     // int32 did_it = 0;
2167     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2168     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2169     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2170   }
2171   // Prepare arguments and build a call to __kmpc_single
2172   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2173   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
2174                         createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
2175                         /*Conditional=*/true);
2176   SingleOpGen.setAction(Action);
2177   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2178   if (DidIt.isValid()) {
2179     // did_it = 1;
2180     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2181   }
2182   Action.Done(CGF);
2183   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2184   // <copy_func>, did_it);
2185   if (DidIt.isValid()) {
2186     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2187     auto CopyprivateArrayTy =
2188         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
2189                                /*IndexTypeQuals=*/0);
2190     // Create a list of all private variables for copyprivate.
2191     Address CopyprivateList =
2192         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2193     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2194       Address Elem = CGF.Builder.CreateConstArrayGEP(
2195           CopyprivateList, I, CGF.getPointerSize());
2196       CGF.Builder.CreateStore(
2197           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2198               CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
2199           Elem);
2200     }
2201     // Build function that copies private values from single region to all other
2202     // threads in the corresponding parallel region.
2203     auto *CpyFn = emitCopyprivateCopyFunction(
2204         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2205         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
2206     auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2207     Address CL =
2208       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2209                                                       CGF.VoidPtrTy);
2210     auto *DidItVal = CGF.Builder.CreateLoad(DidIt);
2211     llvm::Value *Args[] = {
2212         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2213         getThreadID(CGF, Loc),        // i32 <gtid>
2214         BufSize,                      // size_t <buf_size>
2215         CL.getPointer(),              // void *<copyprivate list>
2216         CpyFn,                        // void (*) (void *, void *) <copy_func>
2217         DidItVal                      // i32 did_it
2218     };
2219     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
2220   }
2221 }
2222 
2223 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2224                                         const RegionCodeGenTy &OrderedOpGen,
2225                                         SourceLocation Loc, bool IsThreads) {
2226   if (!CGF.HaveInsertPoint())
2227     return;
2228   // __kmpc_ordered(ident_t *, gtid);
2229   // OrderedOpGen();
2230   // __kmpc_end_ordered(ident_t *, gtid);
2231   // Prepare arguments and build a call to __kmpc_ordered
2232   if (IsThreads) {
2233     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2234     CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
2235                           createRuntimeFunction(OMPRTL__kmpc_end_ordered),
2236                           Args);
2237     OrderedOpGen.setAction(Action);
2238     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2239     return;
2240   }
2241   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2242 }
2243 
2244 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2245                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2246                                       bool ForceSimpleCall) {
2247   if (!CGF.HaveInsertPoint())
2248     return;
2249   // Build call __kmpc_cancel_barrier(loc, thread_id);
2250   // Build call __kmpc_barrier(loc, thread_id);
2251   unsigned Flags;
2252   if (Kind == OMPD_for)
2253     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2254   else if (Kind == OMPD_sections)
2255     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2256   else if (Kind == OMPD_single)
2257     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2258   else if (Kind == OMPD_barrier)
2259     Flags = OMP_IDENT_BARRIER_EXPL;
2260   else
2261     Flags = OMP_IDENT_BARRIER_IMPL;
2262   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2263   // thread_id);
2264   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2265                          getThreadID(CGF, Loc)};
2266   if (auto *OMPRegionInfo =
2267           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
2268     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2269       auto *Result = CGF.EmitRuntimeCall(
2270           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
2271       if (EmitChecks) {
2272         // if (__kmpc_cancel_barrier()) {
2273         //   exit from construct;
2274         // }
2275         auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
2276         auto *ContBB = CGF.createBasicBlock(".cancel.continue");
2277         auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
2278         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2279         CGF.EmitBlock(ExitBB);
2280         //   exit from construct;
2281         auto CancelDestination =
2282             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2283         CGF.EmitBranchThroughCleanup(CancelDestination);
2284         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2285       }
2286       return;
2287     }
2288   }
2289   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
2290 }
2291 
2292 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
2293 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2294                                           bool Chunked, bool Ordered) {
2295   switch (ScheduleKind) {
2296   case OMPC_SCHEDULE_static:
2297     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2298                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2299   case OMPC_SCHEDULE_dynamic:
2300     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2301   case OMPC_SCHEDULE_guided:
2302     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2303   case OMPC_SCHEDULE_runtime:
2304     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2305   case OMPC_SCHEDULE_auto:
2306     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2307   case OMPC_SCHEDULE_unknown:
2308     assert(!Chunked && "chunk was specified but schedule kind not known");
2309     return Ordered ? OMP_ord_static : OMP_sch_static;
2310   }
2311   llvm_unreachable("Unexpected runtime schedule");
2312 }
2313 
2314 /// \brief Map the OpenMP distribute schedule to the runtime enumeration.
2315 static OpenMPSchedType
2316 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2317   // only static is allowed for dist_schedule
2318   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2319 }
2320 
2321 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2322                                          bool Chunked) const {
2323   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2324   return Schedule == OMP_sch_static;
2325 }
2326 
2327 bool CGOpenMPRuntime::isStaticNonchunked(
2328     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2329   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2330   return Schedule == OMP_dist_sch_static;
2331 }
2332 
2333 
2334 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2335   auto Schedule =
2336       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2337   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2338   return Schedule != OMP_sch_static;
2339 }
2340 
2341 static int addMonoNonMonoModifier(OpenMPSchedType Schedule,
2342                                   OpenMPScheduleClauseModifier M1,
2343                                   OpenMPScheduleClauseModifier M2) {
2344   switch (M1) {
2345   case OMPC_SCHEDULE_MODIFIER_monotonic:
2346     return Schedule | OMP_sch_modifier_monotonic;
2347   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2348     return Schedule | OMP_sch_modifier_nonmonotonic;
2349   case OMPC_SCHEDULE_MODIFIER_simd:
2350   case OMPC_SCHEDULE_MODIFIER_last:
2351   case OMPC_SCHEDULE_MODIFIER_unknown:
2352     break;
2353   }
2354   switch (M2) {
2355   case OMPC_SCHEDULE_MODIFIER_monotonic:
2356     return Schedule | OMP_sch_modifier_monotonic;
2357   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2358     return Schedule | OMP_sch_modifier_nonmonotonic;
2359   case OMPC_SCHEDULE_MODIFIER_simd:
2360   case OMPC_SCHEDULE_MODIFIER_last:
2361   case OMPC_SCHEDULE_MODIFIER_unknown:
2362     break;
2363   }
2364   return Schedule;
2365 }
2366 
2367 void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF,
2368                                           SourceLocation Loc,
2369                                           const OpenMPScheduleTy &ScheduleKind,
2370                                           unsigned IVSize, bool IVSigned,
2371                                           bool Ordered, llvm::Value *UB,
2372                                           llvm::Value *Chunk) {
2373   if (!CGF.HaveInsertPoint())
2374     return;
2375   OpenMPSchedType Schedule =
2376       getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered);
2377   assert(Ordered ||
2378          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2379           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked));
2380   // Call __kmpc_dispatch_init(
2381   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2382   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2383   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2384 
2385   // If the Chunk was not specified in the clause - use default value 1.
2386   if (Chunk == nullptr)
2387     Chunk = CGF.Builder.getIntN(IVSize, 1);
2388   llvm::Value *Args[] = {
2389       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2390       CGF.Builder.getInt32(addMonoNonMonoModifier(
2391           Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2392       CGF.Builder.getIntN(IVSize, 0),                   // Lower
2393       UB,                                               // Upper
2394       CGF.Builder.getIntN(IVSize, 1),                   // Stride
2395       Chunk                                             // Chunk
2396   };
2397   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2398 }
2399 
2400 static void emitForStaticInitCall(
2401     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2402     llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule,
2403     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2404     unsigned IVSize, bool Ordered, Address IL, Address LB, Address UB,
2405     Address ST, llvm::Value *Chunk) {
2406   if (!CGF.HaveInsertPoint())
2407      return;
2408 
2409    assert(!Ordered);
2410    assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2411           Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2412           Schedule == OMP_dist_sch_static ||
2413           Schedule == OMP_dist_sch_static_chunked);
2414 
2415    // Call __kmpc_for_static_init(
2416    //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2417    //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2418    //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2419    //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2420    if (Chunk == nullptr) {
2421      assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2422              Schedule == OMP_dist_sch_static) &&
2423             "expected static non-chunked schedule");
2424      // If the Chunk was not specified in the clause - use default value 1.
2425        Chunk = CGF.Builder.getIntN(IVSize, 1);
2426    } else {
2427      assert((Schedule == OMP_sch_static_chunked ||
2428              Schedule == OMP_ord_static_chunked ||
2429              Schedule == OMP_dist_sch_static_chunked) &&
2430             "expected static chunked schedule");
2431    }
2432    llvm::Value *Args[] = {
2433        UpdateLocation, ThreadId, CGF.Builder.getInt32(addMonoNonMonoModifier(
2434                                      Schedule, M1, M2)), // Schedule type
2435        IL.getPointer(),                                  // &isLastIter
2436        LB.getPointer(),                                  // &LB
2437        UB.getPointer(),                                  // &UB
2438        ST.getPointer(),                                  // &Stride
2439        CGF.Builder.getIntN(IVSize, 1),                   // Incr
2440        Chunk                                             // Chunk
2441    };
2442    CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2443 }
2444 
2445 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2446                                         SourceLocation Loc,
2447                                         const OpenMPScheduleTy &ScheduleKind,
2448                                         unsigned IVSize, bool IVSigned,
2449                                         bool Ordered, Address IL, Address LB,
2450                                         Address UB, Address ST,
2451                                         llvm::Value *Chunk) {
2452   OpenMPSchedType ScheduleNum =
2453       getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered);
2454   auto *UpdatedLocation = emitUpdateLocation(CGF, Loc);
2455   auto *ThreadId = getThreadID(CGF, Loc);
2456   auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned);
2457   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2458                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, IVSize,
2459                         Ordered, IL, LB, UB, ST, Chunk);
2460 }
2461 
2462 void CGOpenMPRuntime::emitDistributeStaticInit(
2463     CodeGenFunction &CGF, SourceLocation Loc,
2464     OpenMPDistScheduleClauseKind SchedKind, unsigned IVSize, bool IVSigned,
2465     bool Ordered, Address IL, Address LB, Address UB, Address ST,
2466     llvm::Value *Chunk) {
2467   OpenMPSchedType ScheduleNum = getRuntimeSchedule(SchedKind, Chunk != nullptr);
2468   auto *UpdatedLocation = emitUpdateLocation(CGF, Loc);
2469   auto *ThreadId = getThreadID(CGF, Loc);
2470   auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned);
2471   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2472                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2473                         OMPC_SCHEDULE_MODIFIER_unknown, IVSize, Ordered, IL, LB,
2474                         UB, ST, Chunk);
2475 }
2476 
2477 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2478                                           SourceLocation Loc) {
2479   if (!CGF.HaveInsertPoint())
2480     return;
2481   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2482   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2483   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
2484                       Args);
2485 }
2486 
2487 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2488                                                  SourceLocation Loc,
2489                                                  unsigned IVSize,
2490                                                  bool IVSigned) {
2491   if (!CGF.HaveInsertPoint())
2492     return;
2493   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2494   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2495   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2496 }
2497 
2498 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2499                                           SourceLocation Loc, unsigned IVSize,
2500                                           bool IVSigned, Address IL,
2501                                           Address LB, Address UB,
2502                                           Address ST) {
2503   // Call __kmpc_dispatch_next(
2504   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2505   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2506   //          kmp_int[32|64] *p_stride);
2507   llvm::Value *Args[] = {
2508       emitUpdateLocation(CGF, Loc),
2509       getThreadID(CGF, Loc),
2510       IL.getPointer(), // &isLastIter
2511       LB.getPointer(), // &Lower
2512       UB.getPointer(), // &Upper
2513       ST.getPointer()  // &Stride
2514   };
2515   llvm::Value *Call =
2516       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2517   return CGF.EmitScalarConversion(
2518       Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true),
2519       CGF.getContext().BoolTy, Loc);
2520 }
2521 
2522 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2523                                            llvm::Value *NumThreads,
2524                                            SourceLocation Loc) {
2525   if (!CGF.HaveInsertPoint())
2526     return;
2527   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2528   llvm::Value *Args[] = {
2529       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2530       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2531   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
2532                       Args);
2533 }
2534 
2535 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2536                                          OpenMPProcBindClauseKind ProcBind,
2537                                          SourceLocation Loc) {
2538   if (!CGF.HaveInsertPoint())
2539     return;
2540   // Constants for proc bind value accepted by the runtime.
2541   enum ProcBindTy {
2542     ProcBindFalse = 0,
2543     ProcBindTrue,
2544     ProcBindMaster,
2545     ProcBindClose,
2546     ProcBindSpread,
2547     ProcBindIntel,
2548     ProcBindDefault
2549   } RuntimeProcBind;
2550   switch (ProcBind) {
2551   case OMPC_PROC_BIND_master:
2552     RuntimeProcBind = ProcBindMaster;
2553     break;
2554   case OMPC_PROC_BIND_close:
2555     RuntimeProcBind = ProcBindClose;
2556     break;
2557   case OMPC_PROC_BIND_spread:
2558     RuntimeProcBind = ProcBindSpread;
2559     break;
2560   case OMPC_PROC_BIND_unknown:
2561     llvm_unreachable("Unsupported proc_bind value.");
2562   }
2563   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2564   llvm::Value *Args[] = {
2565       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2566       llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
2567   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
2568 }
2569 
2570 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2571                                 SourceLocation Loc) {
2572   if (!CGF.HaveInsertPoint())
2573     return;
2574   // Build call void __kmpc_flush(ident_t *loc)
2575   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
2576                       emitUpdateLocation(CGF, Loc));
2577 }
2578 
2579 namespace {
2580 /// \brief Indexes of fields for type kmp_task_t.
2581 enum KmpTaskTFields {
2582   /// \brief List of shared variables.
2583   KmpTaskTShareds,
2584   /// \brief Task routine.
2585   KmpTaskTRoutine,
2586   /// \brief Partition id for the untied tasks.
2587   KmpTaskTPartId,
2588   /// \brief Function with call of destructors for private variables.
2589   KmpTaskTDestructors,
2590   /// (Taskloops only) Lower bound.
2591   KmpTaskTLowerBound,
2592   /// (Taskloops only) Upper bound.
2593   KmpTaskTUpperBound,
2594   /// (Taskloops only) Stride.
2595   KmpTaskTStride,
2596   /// (Taskloops only) Is last iteration flag.
2597   KmpTaskTLastIter,
2598 };
2599 } // anonymous namespace
2600 
2601 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2602   // FIXME: Add other entries type when they become supported.
2603   return OffloadEntriesTargetRegion.empty();
2604 }
2605 
2606 /// \brief Initialize target region entry.
2607 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2608     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2609                                     StringRef ParentName, unsigned LineNum,
2610                                     unsigned Order) {
2611   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2612                                              "only required for the device "
2613                                              "code generation.");
2614   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2615       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr);
2616   ++OffloadingEntriesNum;
2617 }
2618 
2619 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2620     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2621                                   StringRef ParentName, unsigned LineNum,
2622                                   llvm::Constant *Addr, llvm::Constant *ID) {
2623   // If we are emitting code for a target, the entry is already initialized,
2624   // only has to be registered.
2625   if (CGM.getLangOpts().OpenMPIsDevice) {
2626     assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
2627            "Entry must exist.");
2628     auto &Entry =
2629         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
2630     assert(Entry.isValid() && "Entry not initialized!");
2631     Entry.setAddress(Addr);
2632     Entry.setID(ID);
2633     return;
2634   } else {
2635     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID);
2636     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
2637   }
2638 }
2639 
2640 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
2641     unsigned DeviceID, unsigned FileID, StringRef ParentName,
2642     unsigned LineNum) const {
2643   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
2644   if (PerDevice == OffloadEntriesTargetRegion.end())
2645     return false;
2646   auto PerFile = PerDevice->second.find(FileID);
2647   if (PerFile == PerDevice->second.end())
2648     return false;
2649   auto PerParentName = PerFile->second.find(ParentName);
2650   if (PerParentName == PerFile->second.end())
2651     return false;
2652   auto PerLine = PerParentName->second.find(LineNum);
2653   if (PerLine == PerParentName->second.end())
2654     return false;
2655   // Fail if this entry is already registered.
2656   if (PerLine->second.getAddress() || PerLine->second.getID())
2657     return false;
2658   return true;
2659 }
2660 
2661 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
2662     const OffloadTargetRegionEntryInfoActTy &Action) {
2663   // Scan all target region entries and perform the provided action.
2664   for (auto &D : OffloadEntriesTargetRegion)
2665     for (auto &F : D.second)
2666       for (auto &P : F.second)
2667         for (auto &L : P.second)
2668           Action(D.first, F.first, P.first(), L.first, L.second);
2669 }
2670 
2671 /// \brief Create a Ctor/Dtor-like function whose body is emitted through
2672 /// \a Codegen. This is used to emit the two functions that register and
2673 /// unregister the descriptor of the current compilation unit.
2674 static llvm::Function *
2675 createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name,
2676                                          const RegionCodeGenTy &Codegen) {
2677   auto &C = CGM.getContext();
2678   FunctionArgList Args;
2679   ImplicitParamDecl DummyPtr(C, /*DC=*/nullptr, SourceLocation(),
2680                              /*Id=*/nullptr, C.VoidPtrTy);
2681   Args.push_back(&DummyPtr);
2682 
2683   CodeGenFunction CGF(CGM);
2684   GlobalDecl();
2685   auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2686   auto FTy = CGM.getTypes().GetFunctionType(FI);
2687   auto *Fn =
2688       CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation());
2689   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args, SourceLocation());
2690   Codegen(CGF);
2691   CGF.FinishFunction();
2692   return Fn;
2693 }
2694 
2695 llvm::Function *
2696 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
2697 
2698   // If we don't have entries or if we are emitting code for the device, we
2699   // don't need to do anything.
2700   if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
2701     return nullptr;
2702 
2703   auto &M = CGM.getModule();
2704   auto &C = CGM.getContext();
2705 
2706   // Get list of devices we care about
2707   auto &Devices = CGM.getLangOpts().OMPTargetTriples;
2708 
2709   // We should be creating an offloading descriptor only if there are devices
2710   // specified.
2711   assert(!Devices.empty() && "No OpenMP offloading devices??");
2712 
2713   // Create the external variables that will point to the begin and end of the
2714   // host entries section. These will be defined by the linker.
2715   auto *OffloadEntryTy =
2716       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
2717   llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable(
2718       M, OffloadEntryTy, /*isConstant=*/true,
2719       llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
2720       ".omp_offloading.entries_begin");
2721   llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable(
2722       M, OffloadEntryTy, /*isConstant=*/true,
2723       llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
2724       ".omp_offloading.entries_end");
2725 
2726   // Create all device images
2727   llvm::SmallVector<llvm::Constant *, 4> DeviceImagesEntires;
2728   auto *DeviceImageTy = cast<llvm::StructType>(
2729       CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy()));
2730 
2731   for (unsigned i = 0; i < Devices.size(); ++i) {
2732     StringRef T = Devices[i].getTriple();
2733     auto *ImgBegin = new llvm::GlobalVariable(
2734         M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
2735         /*Initializer=*/nullptr,
2736         Twine(".omp_offloading.img_start.") + Twine(T));
2737     auto *ImgEnd = new llvm::GlobalVariable(
2738         M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
2739         /*Initializer=*/nullptr, Twine(".omp_offloading.img_end.") + Twine(T));
2740 
2741     llvm::Constant *Dev =
2742         llvm::ConstantStruct::get(DeviceImageTy, ImgBegin, ImgEnd,
2743                                   HostEntriesBegin, HostEntriesEnd, nullptr);
2744     DeviceImagesEntires.push_back(Dev);
2745   }
2746 
2747   // Create device images global array.
2748   llvm::ArrayType *DeviceImagesInitTy =
2749       llvm::ArrayType::get(DeviceImageTy, DeviceImagesEntires.size());
2750   llvm::Constant *DeviceImagesInit =
2751       llvm::ConstantArray::get(DeviceImagesInitTy, DeviceImagesEntires);
2752 
2753   llvm::GlobalVariable *DeviceImages = new llvm::GlobalVariable(
2754       M, DeviceImagesInitTy, /*isConstant=*/true,
2755       llvm::GlobalValue::InternalLinkage, DeviceImagesInit,
2756       ".omp_offloading.device_images");
2757   DeviceImages->setUnnamedAddr(true);
2758 
2759   // This is a Zero array to be used in the creation of the constant expressions
2760   llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
2761                              llvm::Constant::getNullValue(CGM.Int32Ty)};
2762 
2763   // Create the target region descriptor.
2764   auto *BinaryDescriptorTy = cast<llvm::StructType>(
2765       CGM.getTypes().ConvertTypeForMem(getTgtBinaryDescriptorQTy()));
2766   llvm::Constant *TargetRegionsDescriptorInit = llvm::ConstantStruct::get(
2767       BinaryDescriptorTy, llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()),
2768       llvm::ConstantExpr::getGetElementPtr(DeviceImagesInitTy, DeviceImages,
2769                                            Index),
2770       HostEntriesBegin, HostEntriesEnd, nullptr);
2771 
2772   auto *Desc = new llvm::GlobalVariable(
2773       M, BinaryDescriptorTy, /*isConstant=*/true,
2774       llvm::GlobalValue::InternalLinkage, TargetRegionsDescriptorInit,
2775       ".omp_offloading.descriptor");
2776 
2777   // Emit code to register or unregister the descriptor at execution
2778   // startup or closing, respectively.
2779 
2780   // Create a variable to drive the registration and unregistration of the
2781   // descriptor, so we can reuse the logic that emits Ctors and Dtors.
2782   auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var");
2783   ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(),
2784                                 IdentInfo, C.CharTy);
2785 
2786   auto *UnRegFn = createOffloadingBinaryDescriptorFunction(
2787       CGM, ".omp_offloading.descriptor_unreg",
2788       [&](CodeGenFunction &CGF, PrePostActionTy &) {
2789         CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
2790                              Desc);
2791       });
2792   auto *RegFn = createOffloadingBinaryDescriptorFunction(
2793       CGM, ".omp_offloading.descriptor_reg",
2794       [&](CodeGenFunction &CGF, PrePostActionTy &) {
2795         CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib),
2796                              Desc);
2797         CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
2798       });
2799   return RegFn;
2800 }
2801 
2802 void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *ID,
2803                                          llvm::Constant *Addr, uint64_t Size) {
2804   StringRef Name = Addr->getName();
2805   auto *TgtOffloadEntryType = cast<llvm::StructType>(
2806       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()));
2807   llvm::LLVMContext &C = CGM.getModule().getContext();
2808   llvm::Module &M = CGM.getModule();
2809 
2810   // Make sure the address has the right type.
2811   llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy);
2812 
2813   // Create constant string with the name.
2814   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
2815 
2816   llvm::GlobalVariable *Str =
2817       new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true,
2818                                llvm::GlobalValue::InternalLinkage, StrPtrInit,
2819                                ".omp_offloading.entry_name");
2820   Str->setUnnamedAddr(true);
2821   llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy);
2822 
2823   // Create the entry struct.
2824   llvm::Constant *EntryInit = llvm::ConstantStruct::get(
2825       TgtOffloadEntryType, AddrPtr, StrPtr,
2826       llvm::ConstantInt::get(CGM.SizeTy, Size), nullptr);
2827   llvm::GlobalVariable *Entry = new llvm::GlobalVariable(
2828       M, TgtOffloadEntryType, true, llvm::GlobalValue::ExternalLinkage,
2829       EntryInit, ".omp_offloading.entry");
2830 
2831   // The entry has to be created in the section the linker expects it to be.
2832   Entry->setSection(".omp_offloading.entries");
2833   // We can't have any padding between symbols, so we need to have 1-byte
2834   // alignment.
2835   Entry->setAlignment(1);
2836 }
2837 
2838 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2839   // Emit the offloading entries and metadata so that the device codegen side
2840   // can
2841   // easily figure out what to emit. The produced metadata looks like this:
2842   //
2843   // !omp_offload.info = !{!1, ...}
2844   //
2845   // Right now we only generate metadata for function that contain target
2846   // regions.
2847 
2848   // If we do not have entries, we dont need to do anything.
2849   if (OffloadEntriesInfoManager.empty())
2850     return;
2851 
2852   llvm::Module &M = CGM.getModule();
2853   llvm::LLVMContext &C = M.getContext();
2854   SmallVector<OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
2855       OrderedEntries(OffloadEntriesInfoManager.size());
2856 
2857   // Create the offloading info metadata node.
2858   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
2859 
2860   // Auxiliar methods to create metadata values and strings.
2861   auto getMDInt = [&](unsigned v) {
2862     return llvm::ConstantAsMetadata::get(
2863         llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v));
2864   };
2865 
2866   auto getMDString = [&](StringRef v) { return llvm::MDString::get(C, v); };
2867 
2868   // Create function that emits metadata for each target region entry;
2869   auto &&TargetRegionMetadataEmitter = [&](
2870       unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line,
2871       OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
2872     llvm::SmallVector<llvm::Metadata *, 32> Ops;
2873     // Generate metadata for target regions. Each entry of this metadata
2874     // contains:
2875     // - Entry 0 -> Kind of this type of metadata (0).
2876     // - Entry 1 -> Device ID of the file where the entry was identified.
2877     // - Entry 2 -> File ID of the file where the entry was identified.
2878     // - Entry 3 -> Mangled name of the function where the entry was identified.
2879     // - Entry 4 -> Line in the file where the entry was identified.
2880     // - Entry 5 -> Order the entry was created.
2881     // The first element of the metadata node is the kind.
2882     Ops.push_back(getMDInt(E.getKind()));
2883     Ops.push_back(getMDInt(DeviceID));
2884     Ops.push_back(getMDInt(FileID));
2885     Ops.push_back(getMDString(ParentName));
2886     Ops.push_back(getMDInt(Line));
2887     Ops.push_back(getMDInt(E.getOrder()));
2888 
2889     // Save this entry in the right position of the ordered entries array.
2890     OrderedEntries[E.getOrder()] = &E;
2891 
2892     // Add metadata to the named metadata node.
2893     MD->addOperand(llvm::MDNode::get(C, Ops));
2894   };
2895 
2896   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
2897       TargetRegionMetadataEmitter);
2898 
2899   for (auto *E : OrderedEntries) {
2900     assert(E && "All ordered entries must exist!");
2901     if (auto *CE =
2902             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
2903                 E)) {
2904       assert(CE->getID() && CE->getAddress() &&
2905              "Entry ID and Addr are invalid!");
2906       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0);
2907     } else
2908       llvm_unreachable("Unsupported entry kind.");
2909   }
2910 }
2911 
2912 /// \brief Loads all the offload entries information from the host IR
2913 /// metadata.
2914 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
2915   // If we are in target mode, load the metadata from the host IR. This code has
2916   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
2917 
2918   if (!CGM.getLangOpts().OpenMPIsDevice)
2919     return;
2920 
2921   if (CGM.getLangOpts().OMPHostIRFile.empty())
2922     return;
2923 
2924   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
2925   if (Buf.getError())
2926     return;
2927 
2928   llvm::LLVMContext C;
2929   auto ME = llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C);
2930 
2931   if (ME.getError())
2932     return;
2933 
2934   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
2935   if (!MD)
2936     return;
2937 
2938   for (auto I : MD->operands()) {
2939     llvm::MDNode *MN = cast<llvm::MDNode>(I);
2940 
2941     auto getMDInt = [&](unsigned Idx) {
2942       llvm::ConstantAsMetadata *V =
2943           cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
2944       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
2945     };
2946 
2947     auto getMDString = [&](unsigned Idx) {
2948       llvm::MDString *V = cast<llvm::MDString>(MN->getOperand(Idx));
2949       return V->getString();
2950     };
2951 
2952     switch (getMDInt(0)) {
2953     default:
2954       llvm_unreachable("Unexpected metadata!");
2955       break;
2956     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
2957         OFFLOAD_ENTRY_INFO_TARGET_REGION:
2958       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
2959           /*DeviceID=*/getMDInt(1), /*FileID=*/getMDInt(2),
2960           /*ParentName=*/getMDString(3), /*Line=*/getMDInt(4),
2961           /*Order=*/getMDInt(5));
2962       break;
2963     }
2964   }
2965 }
2966 
2967 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
2968   if (!KmpRoutineEntryPtrTy) {
2969     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2970     auto &C = CGM.getContext();
2971     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2972     FunctionProtoType::ExtProtoInfo EPI;
2973     KmpRoutineEntryPtrQTy = C.getPointerType(
2974         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2975     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
2976   }
2977 }
2978 
2979 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
2980                                        QualType FieldTy) {
2981   auto *Field = FieldDecl::Create(
2982       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
2983       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
2984       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
2985   Field->setAccess(AS_public);
2986   DC->addDecl(Field);
2987   return Field;
2988 }
2989 
2990 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
2991 
2992   // Make sure the type of the entry is already created. This is the type we
2993   // have to create:
2994   // struct __tgt_offload_entry{
2995   //   void      *addr;       // Pointer to the offload entry info.
2996   //                          // (function or global)
2997   //   char      *name;       // Name of the function or global.
2998   //   size_t     size;       // Size of the entry info (0 if it a function).
2999   // };
3000   if (TgtOffloadEntryQTy.isNull()) {
3001     ASTContext &C = CGM.getContext();
3002     auto *RD = C.buildImplicitRecord("__tgt_offload_entry");
3003     RD->startDefinition();
3004     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3005     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3006     addFieldToRecordDecl(C, RD, C.getSizeType());
3007     RD->completeDefinition();
3008     TgtOffloadEntryQTy = C.getRecordType(RD);
3009   }
3010   return TgtOffloadEntryQTy;
3011 }
3012 
3013 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() {
3014   // These are the types we need to build:
3015   // struct __tgt_device_image{
3016   // void   *ImageStart;       // Pointer to the target code start.
3017   // void   *ImageEnd;         // Pointer to the target code end.
3018   // // We also add the host entries to the device image, as it may be useful
3019   // // for the target runtime to have access to that information.
3020   // __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all
3021   //                                       // the entries.
3022   // __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
3023   //                                       // entries (non inclusive).
3024   // };
3025   if (TgtDeviceImageQTy.isNull()) {
3026     ASTContext &C = CGM.getContext();
3027     auto *RD = C.buildImplicitRecord("__tgt_device_image");
3028     RD->startDefinition();
3029     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3030     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3031     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
3032     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
3033     RD->completeDefinition();
3034     TgtDeviceImageQTy = C.getRecordType(RD);
3035   }
3036   return TgtDeviceImageQTy;
3037 }
3038 
3039 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() {
3040   // struct __tgt_bin_desc{
3041   //   int32_t              NumDevices;      // Number of devices supported.
3042   //   __tgt_device_image   *DeviceImages;   // Arrays of device images
3043   //                                         // (one per device).
3044   //   __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all the
3045   //                                         // entries.
3046   //   __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
3047   //                                         // entries (non inclusive).
3048   // };
3049   if (TgtBinaryDescriptorQTy.isNull()) {
3050     ASTContext &C = CGM.getContext();
3051     auto *RD = C.buildImplicitRecord("__tgt_bin_desc");
3052     RD->startDefinition();
3053     addFieldToRecordDecl(
3054         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3055     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy()));
3056     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
3057     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
3058     RD->completeDefinition();
3059     TgtBinaryDescriptorQTy = C.getRecordType(RD);
3060   }
3061   return TgtBinaryDescriptorQTy;
3062 }
3063 
3064 namespace {
3065 struct PrivateHelpersTy {
3066   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
3067                    const VarDecl *PrivateElemInit)
3068       : Original(Original), PrivateCopy(PrivateCopy),
3069         PrivateElemInit(PrivateElemInit) {}
3070   const VarDecl *Original;
3071   const VarDecl *PrivateCopy;
3072   const VarDecl *PrivateElemInit;
3073 };
3074 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3075 } // anonymous namespace
3076 
3077 static RecordDecl *
3078 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3079   if (!Privates.empty()) {
3080     auto &C = CGM.getContext();
3081     // Build struct .kmp_privates_t. {
3082     //         /*  private vars  */
3083     //       };
3084     auto *RD = C.buildImplicitRecord(".kmp_privates.t");
3085     RD->startDefinition();
3086     for (auto &&Pair : Privates) {
3087       auto *VD = Pair.second.Original;
3088       auto Type = VD->getType();
3089       Type = Type.getNonReferenceType();
3090       auto *FD = addFieldToRecordDecl(C, RD, Type);
3091       if (VD->hasAttrs()) {
3092         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3093              E(VD->getAttrs().end());
3094              I != E; ++I)
3095           FD->addAttr(*I);
3096       }
3097     }
3098     RD->completeDefinition();
3099     return RD;
3100   }
3101   return nullptr;
3102 }
3103 
3104 static RecordDecl *
3105 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3106                          QualType KmpInt32Ty,
3107                          QualType KmpRoutineEntryPointerQTy) {
3108   auto &C = CGM.getContext();
3109   // Build struct kmp_task_t {
3110   //         void *              shareds;
3111   //         kmp_routine_entry_t routine;
3112   //         kmp_int32           part_id;
3113   //         kmp_routine_entry_t destructors;
3114   // For taskloops additional fields:
3115   //         kmp_uint64          lb;
3116   //         kmp_uint64          ub;
3117   //         kmp_int64           st;
3118   //         kmp_int32           liter;
3119   //       };
3120   auto *RD = C.buildImplicitRecord("kmp_task_t");
3121   RD->startDefinition();
3122   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3123   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3124   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3125   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3126   if (isOpenMPTaskLoopDirective(Kind)) {
3127     QualType KmpUInt64Ty =
3128         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3129     QualType KmpInt64Ty =
3130         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3131     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3132     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3133     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3134     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3135   }
3136   RD->completeDefinition();
3137   return RD;
3138 }
3139 
3140 static RecordDecl *
3141 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3142                                      ArrayRef<PrivateDataTy> Privates) {
3143   auto &C = CGM.getContext();
3144   // Build struct kmp_task_t_with_privates {
3145   //         kmp_task_t task_data;
3146   //         .kmp_privates_t. privates;
3147   //       };
3148   auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3149   RD->startDefinition();
3150   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3151   if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) {
3152     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3153   }
3154   RD->completeDefinition();
3155   return RD;
3156 }
3157 
3158 /// \brief Emit a proxy function which accepts kmp_task_t as the second
3159 /// argument.
3160 /// \code
3161 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3162 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3163 ///   For taskloops:
3164 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3165 ///   tt->shareds);
3166 ///   return 0;
3167 /// }
3168 /// \endcode
3169 static llvm::Value *
3170 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3171                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3172                       QualType KmpTaskTWithPrivatesPtrQTy,
3173                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3174                       QualType SharedsPtrTy, llvm::Value *TaskFunction,
3175                       llvm::Value *TaskPrivatesMap) {
3176   auto &C = CGM.getContext();
3177   FunctionArgList Args;
3178   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
3179   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
3180                                 /*Id=*/nullptr,
3181                                 KmpTaskTWithPrivatesPtrQTy.withRestrict());
3182   Args.push_back(&GtidArg);
3183   Args.push_back(&TaskTypeArg);
3184   auto &TaskEntryFnInfo =
3185       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3186   auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3187   auto *TaskEntry =
3188       llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
3189                              ".omp_task_entry.", &CGM.getModule());
3190   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo);
3191   CodeGenFunction CGF(CGM);
3192   CGF.disableDebugInfo();
3193   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
3194 
3195   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3196   // tt,
3197   // For taskloops:
3198   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3199   // tt->task_data.shareds);
3200   auto *GtidParam = CGF.EmitLoadOfScalar(
3201       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3202   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3203       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3204       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3205   auto *KmpTaskTWithPrivatesQTyRD =
3206       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3207   LValue Base =
3208       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3209   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3210   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3211   auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3212   auto *PartidParam = PartIdLVal.getPointer();
3213 
3214   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3215   auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3216   auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3217       CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(),
3218       CGF.ConvertTypeForMem(SharedsPtrTy));
3219 
3220   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3221   llvm::Value *PrivatesParam;
3222   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3223     auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3224     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3225         PrivatesLVal.getPointer(), CGF.VoidPtrTy);
3226   } else
3227     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3228 
3229   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3230                                TaskPrivatesMap,
3231                                CGF.Builder
3232                                    .CreatePointerBitCastOrAddrSpaceCast(
3233                                        TDBase.getAddress(), CGF.VoidPtrTy)
3234                                    .getPointer()};
3235   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3236                                           std::end(CommonArgs));
3237   if (isOpenMPTaskLoopDirective(Kind)) {
3238     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3239     auto LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3240     auto *LBParam = CGF.EmitLoadOfLValue(LBLVal, Loc).getScalarVal();
3241     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3242     auto UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3243     auto *UBParam = CGF.EmitLoadOfLValue(UBLVal, Loc).getScalarVal();
3244     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3245     auto StLVal = CGF.EmitLValueForField(Base, *StFI);
3246     auto *StParam = CGF.EmitLoadOfLValue(StLVal, Loc).getScalarVal();
3247     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3248     auto LILVal = CGF.EmitLValueForField(Base, *LIFI);
3249     auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal();
3250     CallArgs.push_back(LBParam);
3251     CallArgs.push_back(UBParam);
3252     CallArgs.push_back(StParam);
3253     CallArgs.push_back(LIParam);
3254   }
3255   CallArgs.push_back(SharedsParam);
3256 
3257   CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
3258   CGF.EmitStoreThroughLValue(
3259       RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3260       CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3261   CGF.FinishFunction();
3262   return TaskEntry;
3263 }
3264 
3265 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3266                                             SourceLocation Loc,
3267                                             QualType KmpInt32Ty,
3268                                             QualType KmpTaskTWithPrivatesPtrQTy,
3269                                             QualType KmpTaskTWithPrivatesQTy) {
3270   auto &C = CGM.getContext();
3271   FunctionArgList Args;
3272   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
3273   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
3274                                 /*Id=*/nullptr,
3275                                 KmpTaskTWithPrivatesPtrQTy.withRestrict());
3276   Args.push_back(&GtidArg);
3277   Args.push_back(&TaskTypeArg);
3278   FunctionType::ExtInfo Info;
3279   auto &DestructorFnInfo =
3280       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3281   auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo);
3282   auto *DestructorFn =
3283       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3284                              ".omp_task_destructor.", &CGM.getModule());
3285   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn,
3286                                     DestructorFnInfo);
3287   CodeGenFunction CGF(CGM);
3288   CGF.disableDebugInfo();
3289   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3290                     Args);
3291 
3292   LValue Base = CGF.EmitLoadOfPointerLValue(
3293       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3294       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3295   auto *KmpTaskTWithPrivatesQTyRD =
3296       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3297   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3298   Base = CGF.EmitLValueForField(Base, *FI);
3299   for (auto *Field :
3300        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3301     if (auto DtorKind = Field->getType().isDestructedType()) {
3302       auto FieldLValue = CGF.EmitLValueForField(Base, Field);
3303       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
3304     }
3305   }
3306   CGF.FinishFunction();
3307   return DestructorFn;
3308 }
3309 
3310 /// \brief Emit a privates mapping function for correct handling of private and
3311 /// firstprivate variables.
3312 /// \code
3313 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3314 /// **noalias priv1,...,  <tyn> **noalias privn) {
3315 ///   *priv1 = &.privates.priv1;
3316 ///   ...;
3317 ///   *privn = &.privates.privn;
3318 /// }
3319 /// \endcode
3320 static llvm::Value *
3321 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3322                                ArrayRef<const Expr *> PrivateVars,
3323                                ArrayRef<const Expr *> FirstprivateVars,
3324                                ArrayRef<const Expr *> LastprivateVars,
3325                                QualType PrivatesQTy,
3326                                ArrayRef<PrivateDataTy> Privates) {
3327   auto &C = CGM.getContext();
3328   FunctionArgList Args;
3329   ImplicitParamDecl TaskPrivatesArg(
3330       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3331       C.getPointerType(PrivatesQTy).withConst().withRestrict());
3332   Args.push_back(&TaskPrivatesArg);
3333   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
3334   unsigned Counter = 1;
3335   for (auto *E: PrivateVars) {
3336     Args.push_back(ImplicitParamDecl::Create(
3337         C, /*DC=*/nullptr, Loc,
3338         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
3339                             .withConst()
3340                             .withRestrict()));
3341     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3342     PrivateVarsPos[VD] = Counter;
3343     ++Counter;
3344   }
3345   for (auto *E : FirstprivateVars) {
3346     Args.push_back(ImplicitParamDecl::Create(
3347         C, /*DC=*/nullptr, Loc,
3348         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
3349                             .withConst()
3350                             .withRestrict()));
3351     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3352     PrivateVarsPos[VD] = Counter;
3353     ++Counter;
3354   }
3355   for (auto *E: LastprivateVars) {
3356     Args.push_back(ImplicitParamDecl::Create(
3357         C, /*DC=*/nullptr, Loc,
3358         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
3359                             .withConst()
3360                             .withRestrict()));
3361     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3362     PrivateVarsPos[VD] = Counter;
3363     ++Counter;
3364   }
3365   auto &TaskPrivatesMapFnInfo =
3366       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3367   auto *TaskPrivatesMapTy =
3368       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3369   auto *TaskPrivatesMap = llvm::Function::Create(
3370       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage,
3371       ".omp_task_privates_map.", &CGM.getModule());
3372   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap,
3373                                     TaskPrivatesMapFnInfo);
3374   TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3375   CodeGenFunction CGF(CGM);
3376   CGF.disableDebugInfo();
3377   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3378                     TaskPrivatesMapFnInfo, Args);
3379 
3380   // *privi = &.privates.privi;
3381   LValue Base = CGF.EmitLoadOfPointerLValue(
3382       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3383       TaskPrivatesArg.getType()->castAs<PointerType>());
3384   auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3385   Counter = 0;
3386   for (auto *Field : PrivatesQTyRD->fields()) {
3387     auto FieldLVal = CGF.EmitLValueForField(Base, Field);
3388     auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3389     auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3390     auto RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3391         RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
3392     CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
3393     ++Counter;
3394   }
3395   CGF.FinishFunction();
3396   return TaskPrivatesMap;
3397 }
3398 
3399 static int array_pod_sort_comparator(const PrivateDataTy *P1,
3400                                      const PrivateDataTy *P2) {
3401   return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0);
3402 }
3403 
3404 /// Emit initialization for private variables in task-based directives.
3405 static void emitPrivatesInit(CodeGenFunction &CGF,
3406                              const OMPExecutableDirective &D,
3407                              Address KmpTaskSharedsPtr, LValue TDBase,
3408                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3409                              QualType SharedsTy, QualType SharedsPtrTy,
3410                              const OMPTaskDataTy &Data,
3411                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3412   auto &C = CGF.getContext();
3413   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3414   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3415   LValue SrcBase;
3416   if (!Data.FirstprivateVars.empty()) {
3417     SrcBase = CGF.MakeAddrLValue(
3418         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3419             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3420         SharedsTy);
3421   }
3422   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(
3423       cast<CapturedStmt>(*D.getAssociatedStmt()));
3424   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3425   for (auto &&Pair : Privates) {
3426     auto *VD = Pair.second.PrivateCopy;
3427     auto *Init = VD->getAnyInitializer();
3428     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3429                              !CGF.isTrivialInitializer(Init)))) {
3430       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3431       if (auto *Elem = Pair.second.PrivateElemInit) {
3432         auto *OriginalVD = Pair.second.Original;
3433         auto *SharedField = CapturesInfo.lookup(OriginalVD);
3434         auto SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3435         SharedRefLValue = CGF.MakeAddrLValue(
3436             Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
3437             SharedRefLValue.getType(), AlignmentSource::Decl);
3438         QualType Type = OriginalVD->getType();
3439         if (Type->isArrayType()) {
3440           // Initialize firstprivate array.
3441           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3442             // Perform simple memcpy.
3443             CGF.EmitAggregateAssign(PrivateLValue.getAddress(),
3444                                     SharedRefLValue.getAddress(), Type);
3445           } else {
3446             // Initialize firstprivate array using element-by-element
3447             // intialization.
3448             CGF.EmitOMPAggregateAssign(
3449                 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
3450                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3451                                                   Address SrcElement) {
3452                   // Clean up any temporaries needed by the initialization.
3453                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3454                   InitScope.addPrivate(
3455                       Elem, [SrcElement]() -> Address { return SrcElement; });
3456                   (void)InitScope.Privatize();
3457                   // Emit initialization for single element.
3458                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3459                       CGF, &CapturesInfo);
3460                   CGF.EmitAnyExprToMem(Init, DestElement,
3461                                        Init->getType().getQualifiers(),
3462                                        /*IsInitializer=*/false);
3463                 });
3464           }
3465         } else {
3466           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3467           InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
3468             return SharedRefLValue.getAddress();
3469           });
3470           (void)InitScope.Privatize();
3471           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3472           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3473                              /*capturedByInit=*/false);
3474         }
3475       } else
3476         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3477     }
3478     ++FI;
3479   }
3480 }
3481 
3482 /// Check if duplication function is required for taskloops.
3483 static bool checkInitIsRequired(CodeGenFunction &CGF,
3484                                 ArrayRef<PrivateDataTy> Privates) {
3485   bool InitRequired = false;
3486   for (auto &&Pair : Privates) {
3487     auto *VD = Pair.second.PrivateCopy;
3488     auto *Init = VD->getAnyInitializer();
3489     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3490                                     !CGF.isTrivialInitializer(Init));
3491   }
3492   return InitRequired;
3493 }
3494 
3495 
3496 /// Emit task_dup function (for initialization of
3497 /// private/firstprivate/lastprivate vars and last_iter flag)
3498 /// \code
3499 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3500 /// lastpriv) {
3501 /// // setup lastprivate flag
3502 ///    task_dst->last = lastpriv;
3503 /// // could be constructor calls here...
3504 /// }
3505 /// \endcode
3506 static llvm::Value *
3507 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3508                     const OMPExecutableDirective &D,
3509                     QualType KmpTaskTWithPrivatesPtrQTy,
3510                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3511                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3512                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3513                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3514   auto &C = CGM.getContext();
3515   FunctionArgList Args;
3516   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc,
3517                            /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy);
3518   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc,
3519                            /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy);
3520   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc,
3521                                 /*Id=*/nullptr, C.IntTy);
3522   Args.push_back(&DstArg);
3523   Args.push_back(&SrcArg);
3524   Args.push_back(&LastprivArg);
3525   auto &TaskDupFnInfo =
3526       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3527   auto *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3528   auto *TaskDup =
3529       llvm::Function::Create(TaskDupTy, llvm::GlobalValue::InternalLinkage,
3530                              ".omp_task_dup.", &CGM.getModule());
3531   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskDup, TaskDupFnInfo);
3532   CodeGenFunction CGF(CGM);
3533   CGF.disableDebugInfo();
3534   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args);
3535 
3536   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3537       CGF.GetAddrOfLocalVar(&DstArg),
3538       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3539   // task_dst->liter = lastpriv;
3540   if (WithLastIter) {
3541     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3542     LValue Base = CGF.EmitLValueForField(
3543         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3544     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3545     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3546         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3547     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3548   }
3549 
3550   // Emit initial values for private copies (if any).
3551   assert(!Privates.empty());
3552   Address KmpTaskSharedsPtr = Address::invalid();
3553   if (!Data.FirstprivateVars.empty()) {
3554     LValue TDBase = CGF.EmitLoadOfPointerLValue(
3555         CGF.GetAddrOfLocalVar(&SrcArg),
3556         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3557     LValue Base = CGF.EmitLValueForField(
3558         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3559     KmpTaskSharedsPtr = Address(
3560         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3561                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
3562                                                   KmpTaskTShareds)),
3563                              Loc),
3564         CGF.getNaturalTypeAlignment(SharedsTy));
3565   }
3566   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3567                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3568   CGF.FinishFunction();
3569   return TaskDup;
3570 }
3571 
3572 /// Checks if destructor function is required to be generated.
3573 /// \return true if cleanups are required, false otherwise.
3574 static bool
3575 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
3576   bool NeedsCleanup = false;
3577   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3578   auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
3579   for (auto *FD : PrivateRD->fields()) {
3580     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
3581     if (NeedsCleanup)
3582       break;
3583   }
3584   return NeedsCleanup;
3585 }
3586 
3587 CGOpenMPRuntime::TaskResultTy
3588 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
3589                               const OMPExecutableDirective &D,
3590                               llvm::Value *TaskFunction, QualType SharedsTy,
3591                               Address Shareds, const OMPTaskDataTy &Data) {
3592   auto &C = CGM.getContext();
3593   llvm::SmallVector<PrivateDataTy, 4> Privates;
3594   // Aggregate privates and sort them by the alignment.
3595   auto I = Data.PrivateCopies.begin();
3596   for (auto *E : Data.PrivateVars) {
3597     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3598     Privates.push_back(std::make_pair(
3599         C.getDeclAlign(VD),
3600         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3601                          /*PrivateElemInit=*/nullptr)));
3602     ++I;
3603   }
3604   I = Data.FirstprivateCopies.begin();
3605   auto IElemInitRef = Data.FirstprivateInits.begin();
3606   for (auto *E : Data.FirstprivateVars) {
3607     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3608     Privates.push_back(std::make_pair(
3609         C.getDeclAlign(VD),
3610         PrivateHelpersTy(
3611             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3612             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))));
3613     ++I;
3614     ++IElemInitRef;
3615   }
3616   I = Data.LastprivateCopies.begin();
3617   for (auto *E : Data.LastprivateVars) {
3618     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3619     Privates.push_back(std::make_pair(
3620         C.getDeclAlign(VD),
3621         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3622                          /*PrivateElemInit=*/nullptr)));
3623     ++I;
3624   }
3625   llvm::array_pod_sort(Privates.begin(), Privates.end(),
3626                        array_pod_sort_comparator);
3627   auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3628   // Build type kmp_routine_entry_t (if not built yet).
3629   emitKmpRoutineEntryT(KmpInt32Ty);
3630   // Build type kmp_task_t (if not built yet).
3631   if (KmpTaskTQTy.isNull()) {
3632     KmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3633         CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3634   }
3635   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3636   // Build particular struct kmp_task_t for the given task.
3637   auto *KmpTaskTWithPrivatesQTyRD =
3638       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
3639   auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
3640   QualType KmpTaskTWithPrivatesPtrQTy =
3641       C.getPointerType(KmpTaskTWithPrivatesQTy);
3642   auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
3643   auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo();
3644   auto *KmpTaskTWithPrivatesTySize = CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3645   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3646 
3647   // Emit initial values for private copies (if any).
3648   llvm::Value *TaskPrivatesMap = nullptr;
3649   auto *TaskPrivatesMapTy =
3650       std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(),
3651                 3)
3652           ->getType();
3653   if (!Privates.empty()) {
3654     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3655     TaskPrivatesMap = emitTaskPrivateMappingFunction(
3656         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
3657         FI->getType(), Privates);
3658     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3659         TaskPrivatesMap, TaskPrivatesMapTy);
3660   } else {
3661     TaskPrivatesMap = llvm::ConstantPointerNull::get(
3662         cast<llvm::PointerType>(TaskPrivatesMapTy));
3663   }
3664   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3665   // kmp_task_t *tt);
3666   auto *TaskEntry = emitProxyTaskFunction(
3667       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3668       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3669       TaskPrivatesMap);
3670 
3671   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3672   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3673   // kmp_routine_entry_t *task_entry);
3674   // Task flags. Format is taken from
3675   // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
3676   // description of kmp_tasking_flags struct.
3677   enum {
3678     TiedFlag = 0x1,
3679     FinalFlag = 0x2,
3680     DestructorsFlag = 0x8,
3681     PriorityFlag = 0x20
3682   };
3683   unsigned Flags = Data.Tied ? TiedFlag : 0;
3684   bool NeedsCleanup = false;
3685   if (!Privates.empty()) {
3686     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
3687     if (NeedsCleanup)
3688       Flags = Flags | DestructorsFlag;
3689   }
3690   if (Data.Priority.getInt())
3691     Flags = Flags | PriorityFlag;
3692   auto *TaskFlags =
3693       Data.Final.getPointer()
3694           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3695                                      CGF.Builder.getInt32(FinalFlag),
3696                                      CGF.Builder.getInt32(/*C=*/0))
3697           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3698   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3699   auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3700   llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
3701                               getThreadID(CGF, Loc), TaskFlags,
3702                               KmpTaskTWithPrivatesTySize, SharedsSize,
3703                               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3704                                   TaskEntry, KmpRoutineEntryPtrTy)};
3705   auto *NewTask = CGF.EmitRuntimeCall(
3706       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
3707   auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3708       NewTask, KmpTaskTWithPrivatesPtrTy);
3709   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
3710                                                KmpTaskTWithPrivatesQTy);
3711   LValue TDBase =
3712       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
3713   // Fill the data in the resulting kmp_task_t record.
3714   // Copy shareds if there are any.
3715   Address KmpTaskSharedsPtr = Address::invalid();
3716   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
3717     KmpTaskSharedsPtr =
3718         Address(CGF.EmitLoadOfScalar(
3719                     CGF.EmitLValueForField(
3720                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
3721                                            KmpTaskTShareds)),
3722                     Loc),
3723                 CGF.getNaturalTypeAlignment(SharedsTy));
3724     CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy);
3725   }
3726   // Emit initial values for private copies (if any).
3727   TaskResultTy Result;
3728   if (!Privates.empty()) {
3729     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
3730                      SharedsTy, SharedsPtrTy, Data, Privates,
3731                      /*ForDup=*/false);
3732     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
3733         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3734       Result.TaskDupFn = emitTaskDupFunction(
3735           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3736           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3737           /*WithLastIter=*/!Data.LastprivateVars.empty());
3738     }
3739   }
3740   // Provide pointer to function with destructors for privates.
3741   llvm::Value *DestructorFn =
3742       NeedsCleanup ? emitDestructorsFunction(CGM, Loc, KmpInt32Ty,
3743                                              KmpTaskTWithPrivatesPtrQTy,
3744                                              KmpTaskTWithPrivatesQTy)
3745                    : llvm::ConstantPointerNull::get(
3746                          cast<llvm::PointerType>(KmpRoutineEntryPtrTy));
3747   LValue Destructor = CGF.EmitLValueForField(
3748       TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTDestructors));
3749   CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3750                             DestructorFn, KmpRoutineEntryPtrTy),
3751                         Destructor);
3752   Result.NewTask = NewTask;
3753   Result.TaskEntry = TaskEntry;
3754   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
3755   Result.TDBase = TDBase;
3756   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
3757   return Result;
3758 }
3759 
3760 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
3761                                    const OMPExecutableDirective &D,
3762                                    llvm::Value *TaskFunction,
3763                                    QualType SharedsTy, Address Shareds,
3764                                    const Expr *IfCond,
3765                                    const OMPTaskDataTy &Data) {
3766   if (!CGF.HaveInsertPoint())
3767     return;
3768 
3769   TaskResultTy Result =
3770       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
3771   llvm::Value *NewTask = Result.NewTask;
3772   llvm::Value *TaskEntry = Result.TaskEntry;
3773   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
3774   LValue TDBase = Result.TDBase;
3775   RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
3776   auto &C = CGM.getContext();
3777   // Process list of dependences.
3778   Address DependenciesArray = Address::invalid();
3779   unsigned NumDependencies = Data.Dependences.size();
3780   if (NumDependencies) {
3781     // Dependence kind for RTL.
3782     enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 };
3783     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
3784     RecordDecl *KmpDependInfoRD;
3785     QualType FlagsTy =
3786         C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
3787     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
3788     if (KmpDependInfoTy.isNull()) {
3789       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
3790       KmpDependInfoRD->startDefinition();
3791       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
3792       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
3793       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
3794       KmpDependInfoRD->completeDefinition();
3795       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
3796     } else
3797       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
3798     CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy);
3799     // Define type kmp_depend_info[<Dependences.size()>];
3800     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
3801         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
3802         ArrayType::Normal, /*IndexTypeQuals=*/0);
3803     // kmp_depend_info[<Dependences.size()>] deps;
3804     DependenciesArray =
3805         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
3806     for (unsigned i = 0; i < NumDependencies; ++i) {
3807       const Expr *E = Data.Dependences[i].second;
3808       auto Addr = CGF.EmitLValue(E);
3809       llvm::Value *Size;
3810       QualType Ty = E->getType();
3811       if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
3812         LValue UpAddrLVal =
3813             CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
3814         llvm::Value *UpAddr =
3815             CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
3816         llvm::Value *LowIntPtr =
3817             CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
3818         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
3819         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3820       } else
3821         Size = CGF.getTypeSize(Ty);
3822       auto Base = CGF.MakeAddrLValue(
3823           CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize),
3824           KmpDependInfoTy);
3825       // deps[i].base_addr = &<Dependences[i].second>;
3826       auto BaseAddrLVal = CGF.EmitLValueForField(
3827           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
3828       CGF.EmitStoreOfScalar(
3829           CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
3830           BaseAddrLVal);
3831       // deps[i].len = sizeof(<Dependences[i].second>);
3832       auto LenLVal = CGF.EmitLValueForField(
3833           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
3834       CGF.EmitStoreOfScalar(Size, LenLVal);
3835       // deps[i].flags = <Dependences[i].first>;
3836       RTLDependenceKindTy DepKind;
3837       switch (Data.Dependences[i].first) {
3838       case OMPC_DEPEND_in:
3839         DepKind = DepIn;
3840         break;
3841       // Out and InOut dependencies must use the same code.
3842       case OMPC_DEPEND_out:
3843       case OMPC_DEPEND_inout:
3844         DepKind = DepInOut;
3845         break;
3846       case OMPC_DEPEND_source:
3847       case OMPC_DEPEND_sink:
3848       case OMPC_DEPEND_unknown:
3849         llvm_unreachable("Unknown task dependence type");
3850       }
3851       auto FlagsLVal = CGF.EmitLValueForField(
3852           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
3853       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
3854                             FlagsLVal);
3855     }
3856     DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3857         CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()),
3858         CGF.VoidPtrTy);
3859   }
3860 
3861   // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
3862   // libcall.
3863   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
3864   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
3865   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
3866   // list is not empty
3867   auto *ThreadID = getThreadID(CGF, Loc);
3868   auto *UpLoc = emitUpdateLocation(CGF, Loc);
3869   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
3870   llvm::Value *DepTaskArgs[7];
3871   if (NumDependencies) {
3872     DepTaskArgs[0] = UpLoc;
3873     DepTaskArgs[1] = ThreadID;
3874     DepTaskArgs[2] = NewTask;
3875     DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
3876     DepTaskArgs[4] = DependenciesArray.getPointer();
3877     DepTaskArgs[5] = CGF.Builder.getInt32(0);
3878     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3879   }
3880   auto &&ThenCodeGen = [this, Loc, &Data, TDBase, KmpTaskTQTyRD,
3881                         NumDependencies, &TaskArgs,
3882                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
3883     if (!Data.Tied) {
3884       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3885       auto PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
3886       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
3887     }
3888     if (NumDependencies) {
3889       CGF.EmitRuntimeCall(
3890           createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
3891     } else {
3892       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
3893                           TaskArgs);
3894     }
3895     // Check if parent region is untied and build return for untied task;
3896     if (auto *Region =
3897             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3898       Region->emitUntiedSwitch(CGF);
3899   };
3900 
3901   llvm::Value *DepWaitTaskArgs[6];
3902   if (NumDependencies) {
3903     DepWaitTaskArgs[0] = UpLoc;
3904     DepWaitTaskArgs[1] = ThreadID;
3905     DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
3906     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
3907     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
3908     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3909   }
3910   auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
3911                         NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF,
3912                                                            PrePostActionTy &) {
3913     auto &RT = CGF.CGM.getOpenMPRuntime();
3914     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
3915     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
3916     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
3917     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
3918     // is specified.
3919     if (NumDependencies)
3920       CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
3921                           DepWaitTaskArgs);
3922     // Call proxy_task_entry(gtid, new_task);
3923     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy](
3924         CodeGenFunction &CGF, PrePostActionTy &Action) {
3925       Action.Enter(CGF);
3926       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
3927       CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs);
3928     };
3929 
3930     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
3931     // kmp_task_t *new_task);
3932     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
3933     // kmp_task_t *new_task);
3934     RegionCodeGenTy RCG(CodeGen);
3935     CommonActionTy Action(
3936         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
3937         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
3938     RCG.setAction(Action);
3939     RCG(CGF);
3940   };
3941 
3942   if (IfCond)
3943     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
3944   else {
3945     RegionCodeGenTy ThenRCG(ThenCodeGen);
3946     ThenRCG(CGF);
3947   }
3948 }
3949 
3950 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
3951                                        const OMPLoopDirective &D,
3952                                        llvm::Value *TaskFunction,
3953                                        QualType SharedsTy, Address Shareds,
3954                                        const Expr *IfCond,
3955                                        const OMPTaskDataTy &Data) {
3956   if (!CGF.HaveInsertPoint())
3957     return;
3958   TaskResultTy Result =
3959       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
3960   // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
3961   // libcall.
3962   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
3963   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
3964   // sched, kmp_uint64 grainsize, void *task_dup);
3965   llvm::Value *ThreadID = getThreadID(CGF, Loc);
3966   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
3967   llvm::Value *IfVal;
3968   if (IfCond) {
3969     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
3970                                       /*isSigned=*/true);
3971   } else
3972     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
3973 
3974   LValue LBLVal = CGF.EmitLValueForField(
3975       Result.TDBase,
3976       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
3977   auto *LBVar =
3978       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
3979   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
3980                        /*IsInitializer=*/true);
3981   LValue UBLVal = CGF.EmitLValueForField(
3982       Result.TDBase,
3983       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
3984   auto *UBVar =
3985       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
3986   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
3987                        /*IsInitializer=*/true);
3988   LValue StLVal = CGF.EmitLValueForField(
3989       Result.TDBase,
3990       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
3991   auto *StVar =
3992       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
3993   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
3994                        /*IsInitializer=*/true);
3995   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
3996   llvm::Value *TaskArgs[] = {
3997       UpLoc, ThreadID, Result.NewTask, IfVal, LBLVal.getPointer(),
3998       UBLVal.getPointer(), CGF.EmitLoadOfScalar(StLVal, SourceLocation()),
3999       llvm::ConstantInt::getSigned(CGF.IntTy, Data.Nogroup ? 1 : 0),
4000       llvm::ConstantInt::getSigned(
4001           CGF.IntTy, Data.Schedule.getPointer()
4002                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
4003                          : NoSchedule),
4004       Data.Schedule.getPointer()
4005           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4006                                       /*isSigned=*/false)
4007           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
4008       Result.TaskDupFn
4009           ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Result.TaskDupFn,
4010                                                             CGF.VoidPtrTy)
4011           : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
4012   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
4013 }
4014 
4015 /// \brief Emit reduction operation for each element of array (required for
4016 /// array sections) LHS op = RHS.
4017 /// \param Type Type of array.
4018 /// \param LHSVar Variable on the left side of the reduction operation
4019 /// (references element of array in original variable).
4020 /// \param RHSVar Variable on the right side of the reduction operation
4021 /// (references element of array in original variable).
4022 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
4023 /// RHSVar.
4024 static void EmitOMPAggregateReduction(
4025     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4026     const VarDecl *RHSVar,
4027     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4028                                   const Expr *, const Expr *)> &RedOpGen,
4029     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4030     const Expr *UpExpr = nullptr) {
4031   // Perform element-by-element initialization.
4032   QualType ElementTy;
4033   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4034   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4035 
4036   // Drill down to the base element type on both arrays.
4037   auto ArrayTy = Type->getAsArrayTypeUnsafe();
4038   auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4039 
4040   auto RHSBegin = RHSAddr.getPointer();
4041   auto LHSBegin = LHSAddr.getPointer();
4042   // Cast from pointer to array type to pointer to single element.
4043   auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
4044   // The basic structure here is a while-do loop.
4045   auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4046   auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4047   auto IsEmpty =
4048       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4049   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4050 
4051   // Enter the loop body, making that address the current address.
4052   auto EntryBB = CGF.Builder.GetInsertBlock();
4053   CGF.EmitBlock(BodyBB);
4054 
4055   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4056 
4057   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4058       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4059   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4060   Address RHSElementCurrent =
4061       Address(RHSElementPHI,
4062               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4063 
4064   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4065       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4066   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4067   Address LHSElementCurrent =
4068       Address(LHSElementPHI,
4069               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4070 
4071   // Emit copy.
4072   CodeGenFunction::OMPPrivateScope Scope(CGF);
4073   Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; });
4074   Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; });
4075   Scope.Privatize();
4076   RedOpGen(CGF, XExpr, EExpr, UpExpr);
4077   Scope.ForceCleanup();
4078 
4079   // Shift the address forward by one element.
4080   auto LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4081       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
4082   auto RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4083       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
4084   // Check whether we've reached the end.
4085   auto Done =
4086       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4087   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4088   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4089   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4090 
4091   // Done.
4092   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4093 }
4094 
4095 /// Emit reduction combiner. If the combiner is a simple expression emit it as
4096 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4097 /// UDR combiner function.
4098 static void emitReductionCombiner(CodeGenFunction &CGF,
4099                                   const Expr *ReductionOp) {
4100   if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
4101     if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4102       if (auto *DRE =
4103               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4104         if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4105           std::pair<llvm::Function *, llvm::Function *> Reduction =
4106               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
4107           RValue Func = RValue::get(Reduction.first);
4108           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4109           CGF.EmitIgnoredExpr(ReductionOp);
4110           return;
4111         }
4112   CGF.EmitIgnoredExpr(ReductionOp);
4113 }
4114 
4115 static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
4116                                           llvm::Type *ArgsType,
4117                                           ArrayRef<const Expr *> Privates,
4118                                           ArrayRef<const Expr *> LHSExprs,
4119                                           ArrayRef<const Expr *> RHSExprs,
4120                                           ArrayRef<const Expr *> ReductionOps) {
4121   auto &C = CGM.getContext();
4122 
4123   // void reduction_func(void *LHSArg, void *RHSArg);
4124   FunctionArgList Args;
4125   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
4126                            C.VoidPtrTy);
4127   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
4128                            C.VoidPtrTy);
4129   Args.push_back(&LHSArg);
4130   Args.push_back(&RHSArg);
4131   auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4132   auto *Fn = llvm::Function::Create(
4133       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
4134       ".omp.reduction.reduction_func", &CGM.getModule());
4135   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
4136   CodeGenFunction CGF(CGM);
4137   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
4138 
4139   // Dst = (void*[n])(LHSArg);
4140   // Src = (void*[n])(RHSArg);
4141   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4142       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4143       ArgsType), CGF.getPointerAlign());
4144   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4145       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
4146       ArgsType), CGF.getPointerAlign());
4147 
4148   //  ...
4149   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4150   //  ...
4151   CodeGenFunction::OMPPrivateScope Scope(CGF);
4152   auto IPriv = Privates.begin();
4153   unsigned Idx = 0;
4154   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4155     auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
4156     Scope.addPrivate(RHSVar, [&]() -> Address {
4157       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
4158     });
4159     auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
4160     Scope.addPrivate(LHSVar, [&]() -> Address {
4161       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
4162     });
4163     QualType PrivTy = (*IPriv)->getType();
4164     if (PrivTy->isVariablyModifiedType()) {
4165       // Get array size and emit VLA type.
4166       ++Idx;
4167       Address Elem =
4168           CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize());
4169       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
4170       auto *VLA = CGF.getContext().getAsVariableArrayType(PrivTy);
4171       auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
4172       CodeGenFunction::OpaqueValueMapping OpaqueMap(
4173           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
4174       CGF.EmitVariablyModifiedType(PrivTy);
4175     }
4176   }
4177   Scope.Privatize();
4178   IPriv = Privates.begin();
4179   auto ILHS = LHSExprs.begin();
4180   auto IRHS = RHSExprs.begin();
4181   for (auto *E : ReductionOps) {
4182     if ((*IPriv)->getType()->isArrayType()) {
4183       // Emit reduction for array section.
4184       auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4185       auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4186       EmitOMPAggregateReduction(
4187           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4188           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4189             emitReductionCombiner(CGF, E);
4190           });
4191     } else
4192       // Emit reduction for array subscript or single variable.
4193       emitReductionCombiner(CGF, E);
4194     ++IPriv;
4195     ++ILHS;
4196     ++IRHS;
4197   }
4198   Scope.ForceCleanup();
4199   CGF.FinishFunction();
4200   return Fn;
4201 }
4202 
4203 static void emitSingleReductionCombiner(CodeGenFunction &CGF,
4204                                         const Expr *ReductionOp,
4205                                         const Expr *PrivateRef,
4206                                         const DeclRefExpr *LHS,
4207                                         const DeclRefExpr *RHS) {
4208   if (PrivateRef->getType()->isArrayType()) {
4209     // Emit reduction for array section.
4210     auto *LHSVar = cast<VarDecl>(LHS->getDecl());
4211     auto *RHSVar = cast<VarDecl>(RHS->getDecl());
4212     EmitOMPAggregateReduction(
4213         CGF, PrivateRef->getType(), LHSVar, RHSVar,
4214         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4215           emitReductionCombiner(CGF, ReductionOp);
4216         });
4217   } else
4218     // Emit reduction for array subscript or single variable.
4219     emitReductionCombiner(CGF, ReductionOp);
4220 }
4221 
4222 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
4223                                     ArrayRef<const Expr *> Privates,
4224                                     ArrayRef<const Expr *> LHSExprs,
4225                                     ArrayRef<const Expr *> RHSExprs,
4226                                     ArrayRef<const Expr *> ReductionOps,
4227                                     bool WithNowait, bool SimpleReduction) {
4228   if (!CGF.HaveInsertPoint())
4229     return;
4230   // Next code should be emitted for reduction:
4231   //
4232   // static kmp_critical_name lock = { 0 };
4233   //
4234   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
4235   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
4236   //  ...
4237   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
4238   //  *(Type<n>-1*)rhs[<n>-1]);
4239   // }
4240   //
4241   // ...
4242   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
4243   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
4244   // RedList, reduce_func, &<lock>)) {
4245   // case 1:
4246   //  ...
4247   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4248   //  ...
4249   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4250   // break;
4251   // case 2:
4252   //  ...
4253   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
4254   //  ...
4255   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
4256   // break;
4257   // default:;
4258   // }
4259   //
4260   // if SimpleReduction is true, only the next code is generated:
4261   //  ...
4262   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4263   //  ...
4264 
4265   auto &C = CGM.getContext();
4266 
4267   if (SimpleReduction) {
4268     CodeGenFunction::RunCleanupsScope Scope(CGF);
4269     auto IPriv = Privates.begin();
4270     auto ILHS = LHSExprs.begin();
4271     auto IRHS = RHSExprs.begin();
4272     for (auto *E : ReductionOps) {
4273       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
4274                                   cast<DeclRefExpr>(*IRHS));
4275       ++IPriv;
4276       ++ILHS;
4277       ++IRHS;
4278     }
4279     return;
4280   }
4281 
4282   // 1. Build a list of reduction variables.
4283   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
4284   auto Size = RHSExprs.size();
4285   for (auto *E : Privates) {
4286     if (E->getType()->isVariablyModifiedType())
4287       // Reserve place for array size.
4288       ++Size;
4289   }
4290   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
4291   QualType ReductionArrayTy =
4292       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
4293                              /*IndexTypeQuals=*/0);
4294   Address ReductionList =
4295       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
4296   auto IPriv = Privates.begin();
4297   unsigned Idx = 0;
4298   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
4299     Address Elem =
4300       CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize());
4301     CGF.Builder.CreateStore(
4302         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4303             CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
4304         Elem);
4305     if ((*IPriv)->getType()->isVariablyModifiedType()) {
4306       // Store array size.
4307       ++Idx;
4308       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
4309                                              CGF.getPointerSize());
4310       llvm::Value *Size = CGF.Builder.CreateIntCast(
4311           CGF.getVLASize(
4312                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
4313               .first,
4314           CGF.SizeTy, /*isSigned=*/false);
4315       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
4316                               Elem);
4317     }
4318   }
4319 
4320   // 2. Emit reduce_func().
4321   auto *ReductionFn = emitReductionFunction(
4322       CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
4323       LHSExprs, RHSExprs, ReductionOps);
4324 
4325   // 3. Create static kmp_critical_name lock = { 0 };
4326   auto *Lock = getCriticalRegionLock(".reduction");
4327 
4328   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
4329   // RedList, reduce_func, &<lock>);
4330   auto *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
4331   auto *ThreadId = getThreadID(CGF, Loc);
4332   auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
4333   auto *RL =
4334     CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList.getPointer(),
4335                                                     CGF.VoidPtrTy);
4336   llvm::Value *Args[] = {
4337       IdentTLoc,                             // ident_t *<loc>
4338       ThreadId,                              // i32 <gtid>
4339       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
4340       ReductionArrayTySize,                  // size_type sizeof(RedList)
4341       RL,                                    // void *RedList
4342       ReductionFn, // void (*) (void *, void *) <reduce_func>
4343       Lock         // kmp_critical_name *&<lock>
4344   };
4345   auto Res = CGF.EmitRuntimeCall(
4346       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
4347                                        : OMPRTL__kmpc_reduce),
4348       Args);
4349 
4350   // 5. Build switch(res)
4351   auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
4352   auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
4353 
4354   // 6. Build case 1:
4355   //  ...
4356   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4357   //  ...
4358   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4359   // break;
4360   auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
4361   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
4362   CGF.EmitBlock(Case1BB);
4363 
4364   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4365   llvm::Value *EndArgs[] = {
4366       IdentTLoc, // ident_t *<loc>
4367       ThreadId,  // i32 <gtid>
4368       Lock       // kmp_critical_name *&<lock>
4369   };
4370   auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps](
4371       CodeGenFunction &CGF, PrePostActionTy &Action) {
4372     auto IPriv = Privates.begin();
4373     auto ILHS = LHSExprs.begin();
4374     auto IRHS = RHSExprs.begin();
4375     for (auto *E : ReductionOps) {
4376       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
4377                                   cast<DeclRefExpr>(*IRHS));
4378       ++IPriv;
4379       ++ILHS;
4380       ++IRHS;
4381     }
4382   };
4383   RegionCodeGenTy RCG(CodeGen);
4384   CommonActionTy Action(
4385       nullptr, llvm::None,
4386       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
4387                                        : OMPRTL__kmpc_end_reduce),
4388       EndArgs);
4389   RCG.setAction(Action);
4390   RCG(CGF);
4391 
4392   CGF.EmitBranch(DefaultBB);
4393 
4394   // 7. Build case 2:
4395   //  ...
4396   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
4397   //  ...
4398   // break;
4399   auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
4400   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
4401   CGF.EmitBlock(Case2BB);
4402 
4403   auto &&AtomicCodeGen = [Loc, &Privates, &LHSExprs, &RHSExprs, &ReductionOps](
4404       CodeGenFunction &CGF, PrePostActionTy &Action) {
4405     auto ILHS = LHSExprs.begin();
4406     auto IRHS = RHSExprs.begin();
4407     auto IPriv = Privates.begin();
4408     for (auto *E : ReductionOps) {
4409       const Expr *XExpr = nullptr;
4410       const Expr *EExpr = nullptr;
4411       const Expr *UpExpr = nullptr;
4412       BinaryOperatorKind BO = BO_Comma;
4413       if (auto *BO = dyn_cast<BinaryOperator>(E)) {
4414         if (BO->getOpcode() == BO_Assign) {
4415           XExpr = BO->getLHS();
4416           UpExpr = BO->getRHS();
4417         }
4418       }
4419       // Try to emit update expression as a simple atomic.
4420       auto *RHSExpr = UpExpr;
4421       if (RHSExpr) {
4422         // Analyze RHS part of the whole expression.
4423         if (auto *ACO = dyn_cast<AbstractConditionalOperator>(
4424                 RHSExpr->IgnoreParenImpCasts())) {
4425           // If this is a conditional operator, analyze its condition for
4426           // min/max reduction operator.
4427           RHSExpr = ACO->getCond();
4428         }
4429         if (auto *BORHS =
4430                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
4431           EExpr = BORHS->getRHS();
4432           BO = BORHS->getOpcode();
4433         }
4434       }
4435       if (XExpr) {
4436         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4437         auto &&AtomicRedGen = [BO, VD, IPriv,
4438                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
4439                                     const Expr *EExpr, const Expr *UpExpr) {
4440           LValue X = CGF.EmitLValue(XExpr);
4441           RValue E;
4442           if (EExpr)
4443             E = CGF.EmitAnyExpr(EExpr);
4444           CGF.EmitOMPAtomicSimpleUpdateExpr(
4445               X, E, BO, /*IsXLHSInRHSPart=*/true,
4446               llvm::AtomicOrdering::Monotonic, Loc,
4447               [&CGF, UpExpr, VD, IPriv, Loc](RValue XRValue) {
4448                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4449                 PrivateScope.addPrivate(
4450                     VD, [&CGF, VD, XRValue, Loc]() -> Address {
4451                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
4452                       CGF.emitOMPSimpleStore(
4453                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
4454                           VD->getType().getNonReferenceType(), Loc);
4455                       return LHSTemp;
4456                     });
4457                 (void)PrivateScope.Privatize();
4458                 return CGF.EmitAnyExpr(UpExpr);
4459               });
4460         };
4461         if ((*IPriv)->getType()->isArrayType()) {
4462           // Emit atomic reduction for array section.
4463           auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4464           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
4465                                     AtomicRedGen, XExpr, EExpr, UpExpr);
4466         } else
4467           // Emit atomic reduction for array subscript or single variable.
4468           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
4469       } else {
4470         // Emit as a critical region.
4471         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
4472                                      const Expr *, const Expr *) {
4473           auto &RT = CGF.CGM.getOpenMPRuntime();
4474           RT.emitCriticalRegion(
4475               CGF, ".atomic_reduction",
4476               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
4477                 Action.Enter(CGF);
4478                 emitReductionCombiner(CGF, E);
4479               },
4480               Loc);
4481         };
4482         if ((*IPriv)->getType()->isArrayType()) {
4483           auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4484           auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4485           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4486                                     CritRedGen);
4487         } else
4488           CritRedGen(CGF, nullptr, nullptr, nullptr);
4489       }
4490       ++ILHS;
4491       ++IRHS;
4492       ++IPriv;
4493     }
4494   };
4495   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
4496   if (!WithNowait) {
4497     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
4498     llvm::Value *EndArgs[] = {
4499         IdentTLoc, // ident_t *<loc>
4500         ThreadId,  // i32 <gtid>
4501         Lock       // kmp_critical_name *&<lock>
4502     };
4503     CommonActionTy Action(nullptr, llvm::None,
4504                           createRuntimeFunction(OMPRTL__kmpc_end_reduce),
4505                           EndArgs);
4506     AtomicRCG.setAction(Action);
4507     AtomicRCG(CGF);
4508   } else
4509     AtomicRCG(CGF);
4510 
4511   CGF.EmitBranch(DefaultBB);
4512   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
4513 }
4514 
4515 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
4516                                        SourceLocation Loc) {
4517   if (!CGF.HaveInsertPoint())
4518     return;
4519   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
4520   // global_tid);
4521   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
4522   // Ignore return result until untied tasks are supported.
4523   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
4524   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4525     Region->emitUntiedSwitch(CGF);
4526 }
4527 
4528 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
4529                                            OpenMPDirectiveKind InnerKind,
4530                                            const RegionCodeGenTy &CodeGen,
4531                                            bool HasCancel) {
4532   if (!CGF.HaveInsertPoint())
4533     return;
4534   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
4535   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
4536 }
4537 
4538 namespace {
4539 enum RTCancelKind {
4540   CancelNoreq = 0,
4541   CancelParallel = 1,
4542   CancelLoop = 2,
4543   CancelSections = 3,
4544   CancelTaskgroup = 4
4545 };
4546 } // anonymous namespace
4547 
4548 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
4549   RTCancelKind CancelKind = CancelNoreq;
4550   if (CancelRegion == OMPD_parallel)
4551     CancelKind = CancelParallel;
4552   else if (CancelRegion == OMPD_for)
4553     CancelKind = CancelLoop;
4554   else if (CancelRegion == OMPD_sections)
4555     CancelKind = CancelSections;
4556   else {
4557     assert(CancelRegion == OMPD_taskgroup);
4558     CancelKind = CancelTaskgroup;
4559   }
4560   return CancelKind;
4561 }
4562 
4563 void CGOpenMPRuntime::emitCancellationPointCall(
4564     CodeGenFunction &CGF, SourceLocation Loc,
4565     OpenMPDirectiveKind CancelRegion) {
4566   if (!CGF.HaveInsertPoint())
4567     return;
4568   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
4569   // global_tid, kmp_int32 cncl_kind);
4570   if (auto *OMPRegionInfo =
4571           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
4572     if (OMPRegionInfo->hasCancel()) {
4573       llvm::Value *Args[] = {
4574           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
4575           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
4576       // Ignore return result until untied tasks are supported.
4577       auto *Result = CGF.EmitRuntimeCall(
4578           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
4579       // if (__kmpc_cancellationpoint()) {
4580       //  __kmpc_cancel_barrier();
4581       //   exit from construct;
4582       // }
4583       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
4584       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
4585       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
4586       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
4587       CGF.EmitBlock(ExitBB);
4588       // __kmpc_cancel_barrier();
4589       emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
4590       // exit from construct;
4591       auto CancelDest =
4592           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
4593       CGF.EmitBranchThroughCleanup(CancelDest);
4594       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
4595     }
4596   }
4597 }
4598 
4599 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
4600                                      const Expr *IfCond,
4601                                      OpenMPDirectiveKind CancelRegion) {
4602   if (!CGF.HaveInsertPoint())
4603     return;
4604   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
4605   // kmp_int32 cncl_kind);
4606   if (auto *OMPRegionInfo =
4607           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
4608     auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
4609                                                         PrePostActionTy &) {
4610       auto &RT = CGF.CGM.getOpenMPRuntime();
4611       llvm::Value *Args[] = {
4612           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
4613           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
4614       // Ignore return result until untied tasks are supported.
4615       auto *Result = CGF.EmitRuntimeCall(
4616           RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
4617       // if (__kmpc_cancel()) {
4618       //  __kmpc_cancel_barrier();
4619       //   exit from construct;
4620       // }
4621       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
4622       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
4623       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
4624       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
4625       CGF.EmitBlock(ExitBB);
4626       // __kmpc_cancel_barrier();
4627       RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
4628       // exit from construct;
4629       auto CancelDest =
4630           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
4631       CGF.EmitBranchThroughCleanup(CancelDest);
4632       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
4633     };
4634     if (IfCond)
4635       emitOMPIfClause(CGF, IfCond, ThenGen,
4636                       [](CodeGenFunction &, PrePostActionTy &) {});
4637     else {
4638       RegionCodeGenTy ThenRCG(ThenGen);
4639       ThenRCG(CGF);
4640     }
4641   }
4642 }
4643 
4644 /// \brief Obtain information that uniquely identifies a target entry. This
4645 /// consists of the file and device IDs as well as line number associated with
4646 /// the relevant entry source location.
4647 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
4648                                      unsigned &DeviceID, unsigned &FileID,
4649                                      unsigned &LineNum) {
4650 
4651   auto &SM = C.getSourceManager();
4652 
4653   // The loc should be always valid and have a file ID (the user cannot use
4654   // #pragma directives in macros)
4655 
4656   assert(Loc.isValid() && "Source location is expected to be always valid.");
4657   assert(Loc.isFileID() && "Source location is expected to refer to a file.");
4658 
4659   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
4660   assert(PLoc.isValid() && "Source location is expected to be always valid.");
4661 
4662   llvm::sys::fs::UniqueID ID;
4663   if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
4664     llvm_unreachable("Source file with target region no longer exists!");
4665 
4666   DeviceID = ID.getDevice();
4667   FileID = ID.getFile();
4668   LineNum = PLoc.getLine();
4669 }
4670 
4671 void CGOpenMPRuntime::emitTargetOutlinedFunction(
4672     const OMPExecutableDirective &D, StringRef ParentName,
4673     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
4674     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
4675   assert(!ParentName.empty() && "Invalid target region parent name!");
4676 
4677   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
4678                                    IsOffloadEntry, CodeGen);
4679 }
4680 
4681 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
4682     const OMPExecutableDirective &D, StringRef ParentName,
4683     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
4684     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
4685   // Create a unique name for the entry function using the source location
4686   // information of the current target region. The name will be something like:
4687   //
4688   // __omp_offloading_DD_FFFF_PP_lBB
4689   //
4690   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
4691   // mangled name of the function that encloses the target region and BB is the
4692   // line number of the target region.
4693 
4694   unsigned DeviceID;
4695   unsigned FileID;
4696   unsigned Line;
4697   getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID,
4698                            Line);
4699   SmallString<64> EntryFnName;
4700   {
4701     llvm::raw_svector_ostream OS(EntryFnName);
4702     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
4703        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
4704   }
4705 
4706   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
4707 
4708   CodeGenFunction CGF(CGM, true);
4709   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
4710   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
4711 
4712   OutlinedFn =
4713       CGF.GenerateOpenMPCapturedStmtFunction(CS, /*CastValToPtr=*/true);
4714 
4715   // If this target outline function is not an offload entry, we don't need to
4716   // register it.
4717   if (!IsOffloadEntry)
4718     return;
4719 
4720   // The target region ID is used by the runtime library to identify the current
4721   // target region, so it only has to be unique and not necessarily point to
4722   // anything. It could be the pointer to the outlined function that implements
4723   // the target region, but we aren't using that so that the compiler doesn't
4724   // need to keep that, and could therefore inline the host function if proven
4725   // worthwhile during optimization. In the other hand, if emitting code for the
4726   // device, the ID has to be the function address so that it can retrieved from
4727   // the offloading entry and launched by the runtime library. We also mark the
4728   // outlined function to have external linkage in case we are emitting code for
4729   // the device, because these functions will be entry points to the device.
4730 
4731   if (CGM.getLangOpts().OpenMPIsDevice) {
4732     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
4733     OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage);
4734   } else
4735     OutlinedFnID = new llvm::GlobalVariable(
4736         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
4737         llvm::GlobalValue::PrivateLinkage,
4738         llvm::Constant::getNullValue(CGM.Int8Ty), ".omp_offload.region_id");
4739 
4740   // Register the information for the entry associated with this target region.
4741   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
4742       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID);
4743 }
4744 
4745 /// discard all CompoundStmts intervening between two constructs
4746 static const Stmt *ignoreCompoundStmts(const Stmt *Body) {
4747   while (auto *CS = dyn_cast_or_null<CompoundStmt>(Body))
4748     Body = CS->body_front();
4749 
4750   return Body;
4751 }
4752 
4753 /// \brief Emit the num_teams clause of an enclosed teams directive at the
4754 /// target region scope. If there is no teams directive associated with the
4755 /// target directive, or if there is no num_teams clause associated with the
4756 /// enclosed teams directive, return nullptr.
4757 static llvm::Value *
4758 emitNumTeamsClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime,
4759                                      CodeGenFunction &CGF,
4760                                      const OMPExecutableDirective &D) {
4761 
4762   assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
4763                                               "teams directive expected to be "
4764                                               "emitted only for the host!");
4765 
4766   // FIXME: For the moment we do not support combined directives with target and
4767   // teams, so we do not expect to get any num_teams clause in the provided
4768   // directive. Once we support that, this assertion can be replaced by the
4769   // actual emission of the clause expression.
4770   assert(D.getSingleClause<OMPNumTeamsClause>() == nullptr &&
4771          "Not expecting clause in directive.");
4772 
4773   // If the current target region has a teams region enclosed, we need to get
4774   // the number of teams to pass to the runtime function call. This is done
4775   // by generating the expression in a inlined region. This is required because
4776   // the expression is captured in the enclosing target environment when the
4777   // teams directive is not combined with target.
4778 
4779   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
4780 
4781   // FIXME: Accommodate other combined directives with teams when they become
4782   // available.
4783   if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>(
4784           ignoreCompoundStmts(CS.getCapturedStmt()))) {
4785     if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) {
4786       CGOpenMPInnerExprInfo CGInfo(CGF, CS);
4787       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
4788       llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams());
4789       return CGF.Builder.CreateIntCast(NumTeams, CGF.Int32Ty,
4790                                        /*IsSigned=*/true);
4791     }
4792 
4793     // If we have an enclosed teams directive but no num_teams clause we use
4794     // the default value 0.
4795     return CGF.Builder.getInt32(0);
4796   }
4797 
4798   // No teams associated with the directive.
4799   return nullptr;
4800 }
4801 
4802 /// \brief Emit the thread_limit clause of an enclosed teams directive at the
4803 /// target region scope. If there is no teams directive associated with the
4804 /// target directive, or if there is no thread_limit clause associated with the
4805 /// enclosed teams directive, return nullptr.
4806 static llvm::Value *
4807 emitThreadLimitClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime,
4808                                         CodeGenFunction &CGF,
4809                                         const OMPExecutableDirective &D) {
4810 
4811   assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
4812                                               "teams directive expected to be "
4813                                               "emitted only for the host!");
4814 
4815   // FIXME: For the moment we do not support combined directives with target and
4816   // teams, so we do not expect to get any thread_limit clause in the provided
4817   // directive. Once we support that, this assertion can be replaced by the
4818   // actual emission of the clause expression.
4819   assert(D.getSingleClause<OMPThreadLimitClause>() == nullptr &&
4820          "Not expecting clause in directive.");
4821 
4822   // If the current target region has a teams region enclosed, we need to get
4823   // the thread limit to pass to the runtime function call. This is done
4824   // by generating the expression in a inlined region. This is required because
4825   // the expression is captured in the enclosing target environment when the
4826   // teams directive is not combined with target.
4827 
4828   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
4829 
4830   // FIXME: Accommodate other combined directives with teams when they become
4831   // available.
4832   if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>(
4833           ignoreCompoundStmts(CS.getCapturedStmt()))) {
4834     if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) {
4835       CGOpenMPInnerExprInfo CGInfo(CGF, CS);
4836       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
4837       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit());
4838       return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty,
4839                                        /*IsSigned=*/true);
4840     }
4841 
4842     // If we have an enclosed teams directive but no thread_limit clause we use
4843     // the default value 0.
4844     return CGF.Builder.getInt32(0);
4845   }
4846 
4847   // No teams associated with the directive.
4848   return nullptr;
4849 }
4850 
4851 namespace {
4852 // \brief Utility to handle information from clauses associated with a given
4853 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
4854 // It provides a convenient interface to obtain the information and generate
4855 // code for that information.
4856 class MappableExprsHandler {
4857 public:
4858   /// \brief Values for bit flags used to specify the mapping type for
4859   /// offloading.
4860   enum OpenMPOffloadMappingFlags {
4861     /// \brief Only allocate memory on the device,
4862     OMP_MAP_ALLOC = 0x00,
4863     /// \brief Allocate memory on the device and move data from host to device.
4864     OMP_MAP_TO = 0x01,
4865     /// \brief Allocate memory on the device and move data from device to host.
4866     OMP_MAP_FROM = 0x02,
4867     /// \brief Always perform the requested mapping action on the element, even
4868     /// if it was already mapped before.
4869     OMP_MAP_ALWAYS = 0x04,
4870     /// \brief Decrement the reference count associated with the element without
4871     /// executing any other action.
4872     OMP_MAP_RELEASE = 0x08,
4873     /// \brief Delete the element from the device environment, ignoring the
4874     /// current reference count associated with the element.
4875     OMP_MAP_DELETE = 0x10,
4876     /// \brief The element passed to the device is a pointer.
4877     OMP_MAP_PTR = 0x20,
4878     /// \brief Signal the element as extra, i.e. is not argument to the target
4879     /// region kernel.
4880     OMP_MAP_EXTRA = 0x40,
4881     /// \brief Pass the element to the device by value.
4882     OMP_MAP_BYCOPY = 0x80,
4883   };
4884 
4885   typedef SmallVector<llvm::Value *, 16> MapValuesArrayTy;
4886   typedef SmallVector<unsigned, 16> MapFlagsArrayTy;
4887 
4888 private:
4889   /// \brief Directive from where the map clauses were extracted.
4890   const OMPExecutableDirective &Directive;
4891 
4892   /// \brief Function the directive is being generated for.
4893   CodeGenFunction &CGF;
4894 
4895   llvm::Value *getExprTypeSize(const Expr *E) const {
4896     auto ExprTy = E->getType().getCanonicalType();
4897 
4898     // Reference types are ignored for mapping purposes.
4899     if (auto *RefTy = ExprTy->getAs<ReferenceType>())
4900       ExprTy = RefTy->getPointeeType().getCanonicalType();
4901 
4902     // Given that an array section is considered a built-in type, we need to
4903     // do the calculation based on the length of the section instead of relying
4904     // on CGF.getTypeSize(E->getType()).
4905     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
4906       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
4907                             OAE->getBase()->IgnoreParenImpCasts())
4908                             .getCanonicalType();
4909 
4910       // If there is no length associated with the expression, that means we
4911       // are using the whole length of the base.
4912       if (!OAE->getLength() && OAE->getColonLoc().isValid())
4913         return CGF.getTypeSize(BaseTy);
4914 
4915       llvm::Value *ElemSize;
4916       if (auto *PTy = BaseTy->getAs<PointerType>())
4917         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
4918       else {
4919         auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
4920         assert(ATy && "Expecting array type if not a pointer type.");
4921         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
4922       }
4923 
4924       // If we don't have a length at this point, that is because we have an
4925       // array section with a single element.
4926       if (!OAE->getLength())
4927         return ElemSize;
4928 
4929       auto *LengthVal = CGF.EmitScalarExpr(OAE->getLength());
4930       LengthVal =
4931           CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false);
4932       return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
4933     }
4934     return CGF.getTypeSize(ExprTy);
4935   }
4936 
4937   /// \brief Return the corresponding bits for a given map clause modifier. Add
4938   /// a flag marking the map as a pointer if requested. Add a flag marking the
4939   /// map as extra, meaning is not an argument of the kernel.
4940   unsigned getMapTypeBits(OpenMPMapClauseKind MapType,
4941                           OpenMPMapClauseKind MapTypeModifier, bool AddPtrFlag,
4942                           bool AddExtraFlag) const {
4943     unsigned Bits = 0u;
4944     switch (MapType) {
4945     case OMPC_MAP_alloc:
4946       Bits = OMP_MAP_ALLOC;
4947       break;
4948     case OMPC_MAP_to:
4949       Bits = OMP_MAP_TO;
4950       break;
4951     case OMPC_MAP_from:
4952       Bits = OMP_MAP_FROM;
4953       break;
4954     case OMPC_MAP_tofrom:
4955       Bits = OMP_MAP_TO | OMP_MAP_FROM;
4956       break;
4957     case OMPC_MAP_delete:
4958       Bits = OMP_MAP_DELETE;
4959       break;
4960     case OMPC_MAP_release:
4961       Bits = OMP_MAP_RELEASE;
4962       break;
4963     default:
4964       llvm_unreachable("Unexpected map type!");
4965       break;
4966     }
4967     if (AddPtrFlag)
4968       Bits |= OMP_MAP_PTR;
4969     if (AddExtraFlag)
4970       Bits |= OMP_MAP_EXTRA;
4971     if (MapTypeModifier == OMPC_MAP_always)
4972       Bits |= OMP_MAP_ALWAYS;
4973     return Bits;
4974   }
4975 
4976   /// \brief Return true if the provided expression is a final array section. A
4977   /// final array section, is one whose length can't be proved to be one.
4978   bool isFinalArraySectionExpression(const Expr *E) const {
4979     auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
4980 
4981     // It is not an array section and therefore not a unity-size one.
4982     if (!OASE)
4983       return false;
4984 
4985     // An array section with no colon always refer to a single element.
4986     if (OASE->getColonLoc().isInvalid())
4987       return false;
4988 
4989     auto *Length = OASE->getLength();
4990 
4991     // If we don't have a length we have to check if the array has size 1
4992     // for this dimension. Also, we should always expect a length if the
4993     // base type is pointer.
4994     if (!Length) {
4995       auto BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
4996                          OASE->getBase()->IgnoreParenImpCasts())
4997                          .getCanonicalType();
4998       if (auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
4999         return ATy->getSize().getSExtValue() != 1;
5000       // If we don't have a constant dimension length, we have to consider
5001       // the current section as having any size, so it is not necessarily
5002       // unitary. If it happen to be unity size, that's user fault.
5003       return true;
5004     }
5005 
5006     // Check if the length evaluates to 1.
5007     llvm::APSInt ConstLength;
5008     if (!Length->EvaluateAsInt(ConstLength, CGF.getContext()))
5009       return true; // Can have more that size 1.
5010 
5011     return ConstLength.getSExtValue() != 1;
5012   }
5013 
5014   /// \brief Generate the base pointers, section pointers, sizes and map type
5015   /// bits for the provided map type, map modifier, and expression components.
5016   /// \a IsFirstComponent should be set to true if the provided set of
5017   /// components is the first associated with a capture.
5018   void generateInfoForComponentList(
5019       OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier,
5020       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
5021       MapValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
5022       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
5023       bool IsFirstComponentList) const {
5024 
5025     // The following summarizes what has to be generated for each map and the
5026     // types bellow. The generated information is expressed in this order:
5027     // base pointer, section pointer, size, flags
5028     // (to add to the ones that come from the map type and modifier).
5029     //
5030     // double d;
5031     // int i[100];
5032     // float *p;
5033     //
5034     // struct S1 {
5035     //   int i;
5036     //   float f[50];
5037     // }
5038     // struct S2 {
5039     //   int i;
5040     //   float f[50];
5041     //   S1 s;
5042     //   double *p;
5043     //   struct S2 *ps;
5044     // }
5045     // S2 s;
5046     // S2 *ps;
5047     //
5048     // map(d)
5049     // &d, &d, sizeof(double), noflags
5050     //
5051     // map(i)
5052     // &i, &i, 100*sizeof(int), noflags
5053     //
5054     // map(i[1:23])
5055     // &i(=&i[0]), &i[1], 23*sizeof(int), noflags
5056     //
5057     // map(p)
5058     // &p, &p, sizeof(float*), noflags
5059     //
5060     // map(p[1:24])
5061     // p, &p[1], 24*sizeof(float), noflags
5062     //
5063     // map(s)
5064     // &s, &s, sizeof(S2), noflags
5065     //
5066     // map(s.i)
5067     // &s, &(s.i), sizeof(int), noflags
5068     //
5069     // map(s.s.f)
5070     // &s, &(s.i.f), 50*sizeof(int), noflags
5071     //
5072     // map(s.p)
5073     // &s, &(s.p), sizeof(double*), noflags
5074     //
5075     // map(s.p[:22], s.a s.b)
5076     // &s, &(s.p), sizeof(double*), noflags
5077     // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag + extra_flag
5078     //
5079     // map(s.ps)
5080     // &s, &(s.ps), sizeof(S2*), noflags
5081     //
5082     // map(s.ps->s.i)
5083     // &s, &(s.ps), sizeof(S2*), noflags
5084     // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag + extra_flag
5085     //
5086     // map(s.ps->ps)
5087     // &s, &(s.ps), sizeof(S2*), noflags
5088     // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
5089     //
5090     // map(s.ps->ps->ps)
5091     // &s, &(s.ps), sizeof(S2*), noflags
5092     // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
5093     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
5094     //
5095     // map(s.ps->ps->s.f[:22])
5096     // &s, &(s.ps), sizeof(S2*), noflags
5097     // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
5098     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + extra_flag
5099     //
5100     // map(ps)
5101     // &ps, &ps, sizeof(S2*), noflags
5102     //
5103     // map(ps->i)
5104     // ps, &(ps->i), sizeof(int), noflags
5105     //
5106     // map(ps->s.f)
5107     // ps, &(ps->s.f[0]), 50*sizeof(float), noflags
5108     //
5109     // map(ps->p)
5110     // ps, &(ps->p), sizeof(double*), noflags
5111     //
5112     // map(ps->p[:22])
5113     // ps, &(ps->p), sizeof(double*), noflags
5114     // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag + extra_flag
5115     //
5116     // map(ps->ps)
5117     // ps, &(ps->ps), sizeof(S2*), noflags
5118     //
5119     // map(ps->ps->s.i)
5120     // ps, &(ps->ps), sizeof(S2*), noflags
5121     // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag + extra_flag
5122     //
5123     // map(ps->ps->ps)
5124     // ps, &(ps->ps), sizeof(S2*), noflags
5125     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
5126     //
5127     // map(ps->ps->ps->ps)
5128     // ps, &(ps->ps), sizeof(S2*), noflags
5129     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
5130     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
5131     //
5132     // map(ps->ps->ps->s.f[:22])
5133     // ps, &(ps->ps), sizeof(S2*), noflags
5134     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
5135     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag +
5136     // extra_flag
5137 
5138     // Track if the map information being generated is the first for a capture.
5139     bool IsCaptureFirstInfo = IsFirstComponentList;
5140 
5141     // Scan the components from the base to the complete expression.
5142     auto CI = Components.rbegin();
5143     auto CE = Components.rend();
5144     auto I = CI;
5145 
5146     // Track if the map information being generated is the first for a list of
5147     // components.
5148     bool IsExpressionFirstInfo = true;
5149     llvm::Value *BP = nullptr;
5150 
5151     if (auto *ME = dyn_cast<MemberExpr>(I->getAssociatedExpression())) {
5152       // The base is the 'this' pointer. The content of the pointer is going
5153       // to be the base of the field being mapped.
5154       BP = CGF.EmitScalarExpr(ME->getBase());
5155     } else {
5156       // The base is the reference to the variable.
5157       // BP = &Var.
5158       BP = CGF.EmitLValue(cast<DeclRefExpr>(I->getAssociatedExpression()))
5159                .getPointer();
5160 
5161       // If the variable is a pointer and is being dereferenced (i.e. is not
5162       // the last component), the base has to be the pointer itself, not his
5163       // reference.
5164       if (I->getAssociatedDeclaration()->getType()->isAnyPointerType() &&
5165           std::next(I) != CE) {
5166         auto PtrAddr = CGF.MakeNaturalAlignAddrLValue(
5167             BP, I->getAssociatedDeclaration()->getType());
5168         BP = CGF.EmitLoadOfPointerLValue(PtrAddr.getAddress(),
5169                                          I->getAssociatedDeclaration()
5170                                              ->getType()
5171                                              ->getAs<PointerType>())
5172                  .getPointer();
5173 
5174         // We do not need to generate individual map information for the
5175         // pointer, it can be associated with the combined storage.
5176         ++I;
5177       }
5178     }
5179 
5180     for (; I != CE; ++I) {
5181       auto Next = std::next(I);
5182 
5183       // We need to generate the addresses and sizes if this is the last
5184       // component, if the component is a pointer or if it is an array section
5185       // whose length can't be proved to be one. If this is a pointer, it
5186       // becomes the base address for the following components.
5187 
5188       // A final array section, is one whose length can't be proved to be one.
5189       bool IsFinalArraySection =
5190           isFinalArraySectionExpression(I->getAssociatedExpression());
5191 
5192       // Get information on whether the element is a pointer. Have to do a
5193       // special treatment for array sections given that they are built-in
5194       // types.
5195       const auto *OASE =
5196           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
5197       bool IsPointer =
5198           (OASE &&
5199            OMPArraySectionExpr::getBaseOriginalType(OASE)
5200                .getCanonicalType()
5201                ->isAnyPointerType()) ||
5202           I->getAssociatedExpression()->getType()->isAnyPointerType();
5203 
5204       if (Next == CE || IsPointer || IsFinalArraySection) {
5205 
5206         // If this is not the last component, we expect the pointer to be
5207         // associated with an array expression or member expression.
5208         assert((Next == CE ||
5209                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
5210                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
5211                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
5212                "Unexpected expression");
5213 
5214         // Save the base we are currently using.
5215         BasePointers.push_back(BP);
5216 
5217         auto *LB = CGF.EmitLValue(I->getAssociatedExpression()).getPointer();
5218         auto *Size = getExprTypeSize(I->getAssociatedExpression());
5219 
5220         Pointers.push_back(LB);
5221         Sizes.push_back(Size);
5222         // We need to add a pointer flag for each map that comes from the the
5223         // same expression except for the first one. We need to add the extra
5224         // flag for each map that relates with the current capture, except for
5225         // the first one (there is a set of entries for each capture).
5226         Types.push_back(getMapTypeBits(MapType, MapTypeModifier,
5227                                        !IsExpressionFirstInfo,
5228                                        !IsCaptureFirstInfo));
5229 
5230         // If we have a final array section, we are done with this expression.
5231         if (IsFinalArraySection)
5232           break;
5233 
5234         // The pointer becomes the base for the next element.
5235         if (Next != CE)
5236           BP = LB;
5237 
5238         IsExpressionFirstInfo = false;
5239         IsCaptureFirstInfo = false;
5240         continue;
5241       }
5242     }
5243   }
5244 
5245 public:
5246   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
5247       : Directive(Dir), CGF(CGF) {}
5248 
5249   /// \brief Generate all the base pointers, section pointers, sizes and map
5250   /// types for the extracted mappable expressions.
5251   void generateAllInfo(MapValuesArrayTy &BasePointers,
5252                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
5253                        MapFlagsArrayTy &Types) const {
5254     BasePointers.clear();
5255     Pointers.clear();
5256     Sizes.clear();
5257     Types.clear();
5258 
5259     struct MapInfo {
5260       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
5261       OpenMPMapClauseKind MapType;
5262       OpenMPMapClauseKind MapTypeModifier;
5263     };
5264 
5265     // We have to process the component lists that relate with the same
5266     // declaration in a single chunk so that we can generate the map flags
5267     // correctly. Therefore, we organize all lists in a map.
5268     llvm::DenseMap<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
5269     for (auto *C : Directive.getClausesOfKind<OMPMapClause>())
5270       for (auto L : C->component_lists()) {
5271         const ValueDecl *VD =
5272             L.first ? cast<ValueDecl>(L.first->getCanonicalDecl()) : nullptr;
5273         Info[VD].push_back(
5274             {L.second, C->getMapType(), C->getMapTypeModifier()});
5275       }
5276 
5277     for (auto &M : Info) {
5278       // We need to know when we generate information for the first component
5279       // associated with a capture, because the mapping flags depend on it.
5280       bool IsFirstComponentList = true;
5281       for (MapInfo &L : M.second) {
5282         assert(!L.Components.empty() &&
5283                "Not expecting declaration with no component lists.");
5284         generateInfoForComponentList(L.MapType, L.MapTypeModifier, L.Components,
5285                                      BasePointers, Pointers, Sizes, Types,
5286                                      IsFirstComponentList);
5287         IsFirstComponentList = false;
5288       }
5289     }
5290   }
5291 
5292   /// \brief Generate the base pointers, section pointers, sizes and map types
5293   /// associated to a given capture.
5294   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
5295                               MapValuesArrayTy &BasePointers,
5296                               MapValuesArrayTy &Pointers,
5297                               MapValuesArrayTy &Sizes,
5298                               MapFlagsArrayTy &Types) const {
5299     assert(!Cap->capturesVariableArrayType() &&
5300            "Not expecting to generate map info for a variable array type!");
5301 
5302     BasePointers.clear();
5303     Pointers.clear();
5304     Sizes.clear();
5305     Types.clear();
5306 
5307     const ValueDecl *VD =
5308         Cap->capturesThis()
5309             ? nullptr
5310             : cast<ValueDecl>(Cap->getCapturedVar()->getCanonicalDecl());
5311 
5312     // We need to know when we generating information for the first component
5313     // associated with a capture, because the mapping flags depend on it.
5314     bool IsFirstComponentList = true;
5315     for (auto *C : Directive.getClausesOfKind<OMPMapClause>())
5316       for (auto L : C->decl_component_lists(VD)) {
5317         assert(L.first == VD &&
5318                "We got information for the wrong declaration??");
5319         assert(!L.second.empty() &&
5320                "Not expecting declaration with no component lists.");
5321         generateInfoForComponentList(C->getMapType(), C->getMapTypeModifier(),
5322                                      L.second, BasePointers, Pointers, Sizes,
5323                                      Types, IsFirstComponentList);
5324         IsFirstComponentList = false;
5325       }
5326 
5327     return;
5328   }
5329 };
5330 
5331 enum OpenMPOffloadingReservedDeviceIDs {
5332   /// \brief Device ID if the device was not defined, runtime should get it
5333   /// from environment variables in the spec.
5334   OMP_DEVICEID_UNDEF = -1,
5335 };
5336 } // anonymous namespace
5337 
5338 /// \brief Emit the arrays used to pass the captures and map information to the
5339 /// offloading runtime library. If there is no map or capture information,
5340 /// return nullptr by reference.
5341 static void
5342 emitOffloadingArrays(CodeGenFunction &CGF, llvm::Value *&BasePointersArray,
5343                      llvm::Value *&PointersArray, llvm::Value *&SizesArray,
5344                      llvm::Value *&MapTypesArray,
5345                      MappableExprsHandler::MapValuesArrayTy &BasePointers,
5346                      MappableExprsHandler::MapValuesArrayTy &Pointers,
5347                      MappableExprsHandler::MapValuesArrayTy &Sizes,
5348                      MappableExprsHandler::MapFlagsArrayTy &MapTypes) {
5349   auto &CGM = CGF.CGM;
5350   auto &Ctx = CGF.getContext();
5351 
5352   BasePointersArray = PointersArray = SizesArray = MapTypesArray = nullptr;
5353 
5354   if (unsigned PointerNumVal = BasePointers.size()) {
5355     // Detect if we have any capture size requiring runtime evaluation of the
5356     // size so that a constant array could be eventually used.
5357     bool hasRuntimeEvaluationCaptureSize = false;
5358     for (auto *S : Sizes)
5359       if (!isa<llvm::Constant>(S)) {
5360         hasRuntimeEvaluationCaptureSize = true;
5361         break;
5362       }
5363 
5364     llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true);
5365     QualType PointerArrayType =
5366         Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
5367                                  /*IndexTypeQuals=*/0);
5368 
5369     BasePointersArray =
5370         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
5371     PointersArray =
5372         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
5373 
5374     // If we don't have any VLA types or other types that require runtime
5375     // evaluation, we can use a constant array for the map sizes, otherwise we
5376     // need to fill up the arrays as we do for the pointers.
5377     if (hasRuntimeEvaluationCaptureSize) {
5378       QualType SizeArrayType = Ctx.getConstantArrayType(
5379           Ctx.getSizeType(), PointerNumAP, ArrayType::Normal,
5380           /*IndexTypeQuals=*/0);
5381       SizesArray =
5382           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
5383     } else {
5384       // We expect all the sizes to be constant, so we collect them to create
5385       // a constant array.
5386       SmallVector<llvm::Constant *, 16> ConstSizes;
5387       for (auto S : Sizes)
5388         ConstSizes.push_back(cast<llvm::Constant>(S));
5389 
5390       auto *SizesArrayInit = llvm::ConstantArray::get(
5391           llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes);
5392       auto *SizesArrayGbl = new llvm::GlobalVariable(
5393           CGM.getModule(), SizesArrayInit->getType(),
5394           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
5395           SizesArrayInit, ".offload_sizes");
5396       SizesArrayGbl->setUnnamedAddr(true);
5397       SizesArray = SizesArrayGbl;
5398     }
5399 
5400     // The map types are always constant so we don't need to generate code to
5401     // fill arrays. Instead, we create an array constant.
5402     llvm::Constant *MapTypesArrayInit =
5403         llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes);
5404     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
5405         CGM.getModule(), MapTypesArrayInit->getType(),
5406         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
5407         MapTypesArrayInit, ".offload_maptypes");
5408     MapTypesArrayGbl->setUnnamedAddr(true);
5409     MapTypesArray = MapTypesArrayGbl;
5410 
5411     for (unsigned i = 0; i < PointerNumVal; ++i) {
5412       llvm::Value *BPVal = BasePointers[i];
5413       if (BPVal->getType()->isPointerTy())
5414         BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy);
5415       else {
5416         assert(BPVal->getType()->isIntegerTy() &&
5417                "If not a pointer, the value type must be an integer.");
5418         BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy);
5419       }
5420       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
5421           llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), BasePointersArray,
5422           0, i);
5423       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
5424       CGF.Builder.CreateStore(BPVal, BPAddr);
5425 
5426       llvm::Value *PVal = Pointers[i];
5427       if (PVal->getType()->isPointerTy())
5428         PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy);
5429       else {
5430         assert(PVal->getType()->isIntegerTy() &&
5431                "If not a pointer, the value type must be an integer.");
5432         PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy);
5433       }
5434       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
5435           llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray, 0,
5436           i);
5437       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
5438       CGF.Builder.CreateStore(PVal, PAddr);
5439 
5440       if (hasRuntimeEvaluationCaptureSize) {
5441         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
5442             llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray,
5443             /*Idx0=*/0,
5444             /*Idx1=*/i);
5445         Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType()));
5446         CGF.Builder.CreateStore(
5447             CGF.Builder.CreateIntCast(Sizes[i], CGM.SizeTy, /*isSigned=*/true),
5448             SAddr);
5449       }
5450     }
5451   }
5452 }
5453 /// \brief Emit the arguments to be passed to the runtime library based on the
5454 /// arrays of pointers, sizes and map types.
5455 static void emitOffloadingArraysArgument(
5456     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
5457     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
5458     llvm::Value *&MapTypesArrayArg, llvm::Value *BasePointersArray,
5459     llvm::Value *PointersArray, llvm::Value *SizesArray,
5460     llvm::Value *MapTypesArray, unsigned NumElems) {
5461   auto &CGM = CGF.CGM;
5462   if (NumElems) {
5463     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
5464         llvm::ArrayType::get(CGM.VoidPtrTy, NumElems), BasePointersArray,
5465         /*Idx0=*/0, /*Idx1=*/0);
5466     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
5467         llvm::ArrayType::get(CGM.VoidPtrTy, NumElems), PointersArray,
5468         /*Idx0=*/0,
5469         /*Idx1=*/0);
5470     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
5471         llvm::ArrayType::get(CGM.SizeTy, NumElems), SizesArray,
5472         /*Idx0=*/0, /*Idx1=*/0);
5473     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
5474         llvm::ArrayType::get(CGM.Int32Ty, NumElems), MapTypesArray,
5475         /*Idx0=*/0,
5476         /*Idx1=*/0);
5477   } else {
5478     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
5479     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
5480     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo());
5481     MapTypesArrayArg =
5482         llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo());
5483   }
5484 }
5485 
5486 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
5487                                      const OMPExecutableDirective &D,
5488                                      llvm::Value *OutlinedFn,
5489                                      llvm::Value *OutlinedFnID,
5490                                      const Expr *IfCond, const Expr *Device,
5491                                      ArrayRef<llvm::Value *> CapturedVars) {
5492   if (!CGF.HaveInsertPoint())
5493     return;
5494 
5495   assert(OutlinedFn && "Invalid outlined function!");
5496 
5497   auto &Ctx = CGF.getContext();
5498 
5499   // Fill up the arrays with all the captured variables.
5500   MappableExprsHandler::MapValuesArrayTy KernelArgs;
5501   MappableExprsHandler::MapValuesArrayTy BasePointers;
5502   MappableExprsHandler::MapValuesArrayTy Pointers;
5503   MappableExprsHandler::MapValuesArrayTy Sizes;
5504   MappableExprsHandler::MapFlagsArrayTy MapTypes;
5505 
5506   MappableExprsHandler::MapValuesArrayTy CurBasePointers;
5507   MappableExprsHandler::MapValuesArrayTy CurPointers;
5508   MappableExprsHandler::MapValuesArrayTy CurSizes;
5509   MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
5510 
5511   // Get map clause information.
5512   MappableExprsHandler MCHandler(D, CGF);
5513 
5514   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
5515   auto RI = CS.getCapturedRecordDecl()->field_begin();
5516   auto CV = CapturedVars.begin();
5517   for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
5518                                             CE = CS.capture_end();
5519        CI != CE; ++CI, ++RI, ++CV) {
5520     StringRef Name;
5521     QualType Ty;
5522 
5523     CurBasePointers.clear();
5524     CurPointers.clear();
5525     CurSizes.clear();
5526     CurMapTypes.clear();
5527 
5528     // VLA sizes are passed to the outlined region by copy and do not have map
5529     // information associated.
5530     if (CI->capturesVariableArrayType()) {
5531       CurBasePointers.push_back(*CV);
5532       CurPointers.push_back(*CV);
5533       CurSizes.push_back(CGF.getTypeSize(RI->getType()));
5534       // Copy to the device as an argument. No need to retrieve it.
5535       CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_BYCOPY);
5536     } else {
5537       // If we have any information in the map clause, we use it, otherwise we
5538       // just do a default mapping.
5539       MCHandler.generateInfoForCapture(CI, CurBasePointers, CurPointers,
5540                                        CurSizes, CurMapTypes);
5541 
5542       if (CurBasePointers.empty()) {
5543         // Do the default mapping.
5544         if (CI->capturesThis()) {
5545           CurBasePointers.push_back(*CV);
5546           CurPointers.push_back(*CV);
5547           const PointerType *PtrTy =
5548               cast<PointerType>(RI->getType().getTypePtr());
5549           CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType()));
5550           // Default map type.
5551           CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_TO |
5552                                 MappableExprsHandler::OMP_MAP_FROM);
5553         } else if (CI->capturesVariableByCopy()) {
5554           CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_BYCOPY);
5555           if (!RI->getType()->isAnyPointerType()) {
5556             // If the field is not a pointer, we need to save the actual value
5557             // and load it as a void pointer.
5558             auto DstAddr = CGF.CreateMemTemp(
5559                 Ctx.getUIntPtrType(),
5560                 Twine(CI->getCapturedVar()->getName()) + ".casted");
5561             LValue DstLV = CGF.MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
5562 
5563             auto *SrcAddrVal = CGF.EmitScalarConversion(
5564                 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
5565                 Ctx.getPointerType(RI->getType()), SourceLocation());
5566             LValue SrcLV =
5567                 CGF.MakeNaturalAlignAddrLValue(SrcAddrVal, RI->getType());
5568 
5569             // Store the value using the source type pointer.
5570             CGF.EmitStoreThroughLValue(RValue::get(*CV), SrcLV);
5571 
5572             // Load the value using the destination type pointer.
5573             CurBasePointers.push_back(
5574                 CGF.EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal());
5575             CurPointers.push_back(CurBasePointers.back());
5576           } else {
5577             CurBasePointers.push_back(*CV);
5578             CurPointers.push_back(*CV);
5579           }
5580           CurSizes.push_back(CGF.getTypeSize(RI->getType()));
5581         } else {
5582           assert(CI->capturesVariable() && "Expected captured reference.");
5583           CurBasePointers.push_back(*CV);
5584           CurPointers.push_back(*CV);
5585 
5586           const ReferenceType *PtrTy =
5587               cast<ReferenceType>(RI->getType().getTypePtr());
5588           QualType ElementType = PtrTy->getPointeeType();
5589           CurSizes.push_back(CGF.getTypeSize(ElementType));
5590           // The default map type for a scalar/complex type is 'to' because by
5591           // default the value doesn't have to be retrieved. For an aggregate
5592           // type,
5593           // the default is 'tofrom'.
5594           CurMapTypes.push_back(ElementType->isAggregateType()
5595                                     ? (MappableExprsHandler::OMP_MAP_TO |
5596                                        MappableExprsHandler::OMP_MAP_FROM)
5597                                     : MappableExprsHandler::OMP_MAP_TO);
5598         }
5599       }
5600     }
5601     // We expect to have at least an element of information for this capture.
5602     assert(!CurBasePointers.empty() && "Non-existing map pointer for capture!");
5603     assert(CurBasePointers.size() == CurPointers.size() &&
5604            CurBasePointers.size() == CurSizes.size() &&
5605            CurBasePointers.size() == CurMapTypes.size() &&
5606            "Inconsistent map information sizes!");
5607 
5608     // The kernel args are always the first elements of the base pointers
5609     // associated with a capture.
5610     KernelArgs.push_back(CurBasePointers.front());
5611     // We need to append the results of this capture to what we already have.
5612     BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
5613     Pointers.append(CurPointers.begin(), CurPointers.end());
5614     Sizes.append(CurSizes.begin(), CurSizes.end());
5615     MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
5616   }
5617 
5618   // Keep track on whether the host function has to be executed.
5619   auto OffloadErrorQType =
5620       Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true);
5621   auto OffloadError = CGF.MakeAddrLValue(
5622       CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"),
5623       OffloadErrorQType);
5624   CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty),
5625                         OffloadError);
5626 
5627   // Fill up the pointer arrays and transfer execution to the device.
5628   auto &&ThenGen = [&Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes, Device,
5629                     OutlinedFnID, OffloadError, OffloadErrorQType,
5630                     &D](CodeGenFunction &CGF, PrePostActionTy &) {
5631     auto &RT = CGF.CGM.getOpenMPRuntime();
5632     // Emit the offloading arrays.
5633     llvm::Value *BasePointersArray;
5634     llvm::Value *PointersArray;
5635     llvm::Value *SizesArray;
5636     llvm::Value *MapTypesArray;
5637     emitOffloadingArrays(CGF, BasePointersArray, PointersArray, SizesArray,
5638                          MapTypesArray, BasePointers, Pointers, Sizes,
5639                          MapTypes);
5640     emitOffloadingArraysArgument(CGF, BasePointersArray, PointersArray,
5641                                  SizesArray, MapTypesArray, BasePointersArray,
5642                                  PointersArray, SizesArray, MapTypesArray,
5643                                  BasePointers.size());
5644 
5645     // On top of the arrays that were filled up, the target offloading call
5646     // takes as arguments the device id as well as the host pointer. The host
5647     // pointer is used by the runtime library to identify the current target
5648     // region, so it only has to be unique and not necessarily point to
5649     // anything. It could be the pointer to the outlined function that
5650     // implements the target region, but we aren't using that so that the
5651     // compiler doesn't need to keep that, and could therefore inline the host
5652     // function if proven worthwhile during optimization.
5653 
5654     // From this point on, we need to have an ID of the target region defined.
5655     assert(OutlinedFnID && "Invalid outlined function ID!");
5656 
5657     // Emit device ID if any.
5658     llvm::Value *DeviceID;
5659     if (Device)
5660       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
5661                                            CGF.Int32Ty, /*isSigned=*/true);
5662     else
5663       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
5664 
5665     // Emit the number of elements in the offloading arrays.
5666     llvm::Value *PointerNum = CGF.Builder.getInt32(BasePointers.size());
5667 
5668     // Return value of the runtime offloading call.
5669     llvm::Value *Return;
5670 
5671     auto *NumTeams = emitNumTeamsClauseForTargetDirective(RT, CGF, D);
5672     auto *ThreadLimit = emitThreadLimitClauseForTargetDirective(RT, CGF, D);
5673 
5674     // If we have NumTeams defined this means that we have an enclosed teams
5675     // region. Therefore we also expect to have ThreadLimit defined. These two
5676     // values should be defined in the presence of a teams directive, regardless
5677     // of having any clauses associated. If the user is using teams but no
5678     // clauses, these two values will be the default that should be passed to
5679     // the runtime library - a 32-bit integer with the value zero.
5680     if (NumTeams) {
5681       assert(ThreadLimit && "Thread limit expression should be available along "
5682                             "with number of teams.");
5683       llvm::Value *OffloadingArgs[] = {
5684           DeviceID,          OutlinedFnID,  PointerNum,
5685           BasePointersArray, PointersArray, SizesArray,
5686           MapTypesArray,     NumTeams,      ThreadLimit};
5687       Return = CGF.EmitRuntimeCall(
5688           RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs);
5689     } else {
5690       llvm::Value *OffloadingArgs[] = {
5691           DeviceID,      OutlinedFnID, PointerNum,   BasePointersArray,
5692           PointersArray, SizesArray,   MapTypesArray};
5693       Return = CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target),
5694                                    OffloadingArgs);
5695     }
5696 
5697     CGF.EmitStoreOfScalar(Return, OffloadError);
5698   };
5699 
5700   // Notify that the host version must be executed.
5701   auto &&ElseGen = [OffloadError](CodeGenFunction &CGF, PrePostActionTy &) {
5702     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/-1u),
5703                           OffloadError);
5704   };
5705 
5706   // If we have a target function ID it means that we need to support
5707   // offloading, otherwise, just execute on the host. We need to execute on host
5708   // regardless of the conditional in the if clause if, e.g., the user do not
5709   // specify target triples.
5710   if (OutlinedFnID) {
5711     if (IfCond)
5712       emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
5713     else {
5714       RegionCodeGenTy ThenRCG(ThenGen);
5715       ThenRCG(CGF);
5716     }
5717   } else {
5718     RegionCodeGenTy ElseRCG(ElseGen);
5719     ElseRCG(CGF);
5720   }
5721 
5722   // Check the error code and execute the host version if required.
5723   auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed");
5724   auto OffloadContBlock = CGF.createBasicBlock("omp_offload.cont");
5725   auto OffloadErrorVal = CGF.EmitLoadOfScalar(OffloadError, SourceLocation());
5726   auto Failed = CGF.Builder.CreateIsNotNull(OffloadErrorVal);
5727   CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
5728 
5729   CGF.EmitBlock(OffloadFailedBlock);
5730   CGF.Builder.CreateCall(OutlinedFn, KernelArgs);
5731   CGF.EmitBranch(OffloadContBlock);
5732 
5733   CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
5734 }
5735 
5736 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
5737                                                     StringRef ParentName) {
5738   if (!S)
5739     return;
5740 
5741   // If we find a OMP target directive, codegen the outline function and
5742   // register the result.
5743   // FIXME: Add other directives with target when they become supported.
5744   bool isTargetDirective = isa<OMPTargetDirective>(S);
5745 
5746   if (isTargetDirective) {
5747     auto *E = cast<OMPExecutableDirective>(S);
5748     unsigned DeviceID;
5749     unsigned FileID;
5750     unsigned Line;
5751     getTargetEntryUniqueInfo(CGM.getContext(), E->getLocStart(), DeviceID,
5752                              FileID, Line);
5753 
5754     // Is this a target region that should not be emitted as an entry point? If
5755     // so just signal we are done with this target region.
5756     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
5757                                                             ParentName, Line))
5758       return;
5759 
5760     llvm::Function *Fn;
5761     llvm::Constant *Addr;
5762     std::tie(Fn, Addr) =
5763         CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction(
5764             CGM, cast<OMPTargetDirective>(*E), ParentName,
5765             /*isOffloadEntry=*/true);
5766     assert(Fn && Addr && "Target region emission failed.");
5767     return;
5768   }
5769 
5770   if (const OMPExecutableDirective *E = dyn_cast<OMPExecutableDirective>(S)) {
5771     if (!E->hasAssociatedStmt())
5772       return;
5773 
5774     scanForTargetRegionsFunctions(
5775         cast<CapturedStmt>(E->getAssociatedStmt())->getCapturedStmt(),
5776         ParentName);
5777     return;
5778   }
5779 
5780   // If this is a lambda function, look into its body.
5781   if (auto *L = dyn_cast<LambdaExpr>(S))
5782     S = L->getBody();
5783 
5784   // Keep looking for target regions recursively.
5785   for (auto *II : S->children())
5786     scanForTargetRegionsFunctions(II, ParentName);
5787 }
5788 
5789 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
5790   auto &FD = *cast<FunctionDecl>(GD.getDecl());
5791 
5792   // If emitting code for the host, we do not process FD here. Instead we do
5793   // the normal code generation.
5794   if (!CGM.getLangOpts().OpenMPIsDevice)
5795     return false;
5796 
5797   // Try to detect target regions in the function.
5798   scanForTargetRegionsFunctions(FD.getBody(), CGM.getMangledName(GD));
5799 
5800   // We should not emit any function othen that the ones created during the
5801   // scanning. Therefore, we signal that this function is completely dealt
5802   // with.
5803   return true;
5804 }
5805 
5806 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
5807   if (!CGM.getLangOpts().OpenMPIsDevice)
5808     return false;
5809 
5810   // Check if there are Ctors/Dtors in this declaration and look for target
5811   // regions in it. We use the complete variant to produce the kernel name
5812   // mangling.
5813   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
5814   if (auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
5815     for (auto *Ctor : RD->ctors()) {
5816       StringRef ParentName =
5817           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
5818       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
5819     }
5820     auto *Dtor = RD->getDestructor();
5821     if (Dtor) {
5822       StringRef ParentName =
5823           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
5824       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
5825     }
5826   }
5827 
5828   // If we are in target mode we do not emit any global (declare target is not
5829   // implemented yet). Therefore we signal that GD was processed in this case.
5830   return true;
5831 }
5832 
5833 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
5834   auto *VD = GD.getDecl();
5835   if (isa<FunctionDecl>(VD))
5836     return emitTargetFunctions(GD);
5837 
5838   return emitTargetGlobalVariable(GD);
5839 }
5840 
5841 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
5842   // If we have offloading in the current module, we need to emit the entries
5843   // now and register the offloading descriptor.
5844   createOffloadEntriesAndInfoMetadata();
5845 
5846   // Create and register the offloading binary descriptors. This is the main
5847   // entity that captures all the information about offloading in the current
5848   // compilation unit.
5849   return createOffloadingBinaryDescriptorRegistration();
5850 }
5851 
5852 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
5853                                     const OMPExecutableDirective &D,
5854                                     SourceLocation Loc,
5855                                     llvm::Value *OutlinedFn,
5856                                     ArrayRef<llvm::Value *> CapturedVars) {
5857   if (!CGF.HaveInsertPoint())
5858     return;
5859 
5860   auto *RTLoc = emitUpdateLocation(CGF, Loc);
5861   CodeGenFunction::RunCleanupsScope Scope(CGF);
5862 
5863   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
5864   llvm::Value *Args[] = {
5865       RTLoc,
5866       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
5867       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
5868   llvm::SmallVector<llvm::Value *, 16> RealArgs;
5869   RealArgs.append(std::begin(Args), std::end(Args));
5870   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
5871 
5872   auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
5873   CGF.EmitRuntimeCall(RTLFn, RealArgs);
5874 }
5875 
5876 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
5877                                          const Expr *NumTeams,
5878                                          const Expr *ThreadLimit,
5879                                          SourceLocation Loc) {
5880   if (!CGF.HaveInsertPoint())
5881     return;
5882 
5883   auto *RTLoc = emitUpdateLocation(CGF, Loc);
5884 
5885   llvm::Value *NumTeamsVal =
5886       (NumTeams)
5887           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
5888                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
5889           : CGF.Builder.getInt32(0);
5890 
5891   llvm::Value *ThreadLimitVal =
5892       (ThreadLimit)
5893           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
5894                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
5895           : CGF.Builder.getInt32(0);
5896 
5897   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
5898   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
5899                                      ThreadLimitVal};
5900   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
5901                       PushNumTeamsArgs);
5902 }
5903 
5904 void CGOpenMPRuntime::emitTargetDataCalls(CodeGenFunction &CGF,
5905                                           const OMPExecutableDirective &D,
5906                                           const Expr *IfCond,
5907                                           const Expr *Device,
5908                                           const RegionCodeGenTy &CodeGen) {
5909 
5910   if (!CGF.HaveInsertPoint())
5911     return;
5912 
5913   llvm::Value *BasePointersArray = nullptr;
5914   llvm::Value *PointersArray = nullptr;
5915   llvm::Value *SizesArray = nullptr;
5916   llvm::Value *MapTypesArray = nullptr;
5917   unsigned NumOfPtrs = 0;
5918 
5919   // Generate the code for the opening of the data environment. Capture all the
5920   // arguments of the runtime call by reference because they are used in the
5921   // closing of the region.
5922   auto &&BeginThenGen = [&D, &CGF, &BasePointersArray, &PointersArray,
5923                          &SizesArray, &MapTypesArray, Device,
5924                          &NumOfPtrs](CodeGenFunction &CGF, PrePostActionTy &) {
5925     // Fill up the arrays with all the mapped variables.
5926     MappableExprsHandler::MapValuesArrayTy BasePointers;
5927     MappableExprsHandler::MapValuesArrayTy Pointers;
5928     MappableExprsHandler::MapValuesArrayTy Sizes;
5929     MappableExprsHandler::MapFlagsArrayTy MapTypes;
5930 
5931     // Get map clause information.
5932     MappableExprsHandler MCHandler(D, CGF);
5933     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
5934     NumOfPtrs = BasePointers.size();
5935 
5936     // Fill up the arrays and create the arguments.
5937     emitOffloadingArrays(CGF, BasePointersArray, PointersArray, SizesArray,
5938                          MapTypesArray, BasePointers, Pointers, Sizes,
5939                          MapTypes);
5940 
5941     llvm::Value *BasePointersArrayArg = nullptr;
5942     llvm::Value *PointersArrayArg = nullptr;
5943     llvm::Value *SizesArrayArg = nullptr;
5944     llvm::Value *MapTypesArrayArg = nullptr;
5945     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
5946                                  SizesArrayArg, MapTypesArrayArg,
5947                                  BasePointersArray, PointersArray, SizesArray,
5948                                  MapTypesArray, NumOfPtrs);
5949 
5950     // Emit device ID if any.
5951     llvm::Value *DeviceID = nullptr;
5952     if (Device)
5953       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
5954                                            CGF.Int32Ty, /*isSigned=*/true);
5955     else
5956       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
5957 
5958     // Emit the number of elements in the offloading arrays.
5959     auto *PointerNum = CGF.Builder.getInt32(NumOfPtrs);
5960 
5961     llvm::Value *OffloadingArgs[] = {
5962         DeviceID,         PointerNum,    BasePointersArrayArg,
5963         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
5964     auto &RT = CGF.CGM.getOpenMPRuntime();
5965     CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_begin),
5966                         OffloadingArgs);
5967   };
5968 
5969   // Generate code for the closing of the data region.
5970   auto &&EndThenGen = [&CGF, &BasePointersArray, &PointersArray, &SizesArray,
5971                        &MapTypesArray, Device,
5972                        &NumOfPtrs](CodeGenFunction &CGF, PrePostActionTy &) {
5973     assert(BasePointersArray && PointersArray && SizesArray && MapTypesArray &&
5974            NumOfPtrs && "Invalid data environment closing arguments.");
5975 
5976     llvm::Value *BasePointersArrayArg = nullptr;
5977     llvm::Value *PointersArrayArg = nullptr;
5978     llvm::Value *SizesArrayArg = nullptr;
5979     llvm::Value *MapTypesArrayArg = nullptr;
5980     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
5981                                  SizesArrayArg, MapTypesArrayArg,
5982                                  BasePointersArray, PointersArray, SizesArray,
5983                                  MapTypesArray, NumOfPtrs);
5984 
5985     // Emit device ID if any.
5986     llvm::Value *DeviceID = nullptr;
5987     if (Device)
5988       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
5989                                            CGF.Int32Ty, /*isSigned=*/true);
5990     else
5991       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
5992 
5993     // Emit the number of elements in the offloading arrays.
5994     auto *PointerNum = CGF.Builder.getInt32(NumOfPtrs);
5995 
5996     llvm::Value *OffloadingArgs[] = {
5997         DeviceID,         PointerNum,    BasePointersArrayArg,
5998         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
5999     auto &RT = CGF.CGM.getOpenMPRuntime();
6000     CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_end),
6001                         OffloadingArgs);
6002   };
6003 
6004   // In the event we get an if clause, we don't have to take any action on the
6005   // else side.
6006   auto &&ElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
6007 
6008   if (IfCond) {
6009     emitOMPIfClause(CGF, IfCond, BeginThenGen, ElseGen);
6010   } else {
6011     RegionCodeGenTy BeginThenRCG(BeginThenGen);
6012     BeginThenRCG(CGF);
6013   }
6014 
6015   CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, CodeGen);
6016 
6017   if (IfCond) {
6018     emitOMPIfClause(CGF, IfCond, EndThenGen, ElseGen);
6019   } else {
6020     RegionCodeGenTy EndThenRCG(EndThenGen);
6021     EndThenRCG(CGF);
6022   }
6023 }
6024 
6025 void CGOpenMPRuntime::emitTargetEnterOrExitDataCall(
6026     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
6027     const Expr *Device) {
6028   if (!CGF.HaveInsertPoint())
6029     return;
6030 
6031   assert((isa<OMPTargetEnterDataDirective>(D) ||
6032           isa<OMPTargetExitDataDirective>(D)) &&
6033          "Expecting either target enter or exit data directives.");
6034 
6035   // Generate the code for the opening of the data environment.
6036   auto &&ThenGen = [&D, &CGF, Device](CodeGenFunction &CGF, PrePostActionTy &) {
6037     // Fill up the arrays with all the mapped variables.
6038     MappableExprsHandler::MapValuesArrayTy BasePointers;
6039     MappableExprsHandler::MapValuesArrayTy Pointers;
6040     MappableExprsHandler::MapValuesArrayTy Sizes;
6041     MappableExprsHandler::MapFlagsArrayTy MapTypes;
6042 
6043     // Get map clause information.
6044     MappableExprsHandler MCHandler(D, CGF);
6045     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
6046 
6047     llvm::Value *BasePointersArrayArg = nullptr;
6048     llvm::Value *PointersArrayArg = nullptr;
6049     llvm::Value *SizesArrayArg = nullptr;
6050     llvm::Value *MapTypesArrayArg = nullptr;
6051 
6052     // Fill up the arrays and create the arguments.
6053     emitOffloadingArrays(CGF, BasePointersArrayArg, PointersArrayArg,
6054                          SizesArrayArg, MapTypesArrayArg, BasePointers,
6055                          Pointers, Sizes, MapTypes);
6056     emitOffloadingArraysArgument(
6057         CGF, BasePointersArrayArg, PointersArrayArg, SizesArrayArg,
6058         MapTypesArrayArg, BasePointersArrayArg, PointersArrayArg, SizesArrayArg,
6059         MapTypesArrayArg, BasePointers.size());
6060 
6061     // Emit device ID if any.
6062     llvm::Value *DeviceID = nullptr;
6063     if (Device)
6064       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
6065                                            CGF.Int32Ty, /*isSigned=*/true);
6066     else
6067       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
6068 
6069     // Emit the number of elements in the offloading arrays.
6070     auto *PointerNum = CGF.Builder.getInt32(BasePointers.size());
6071 
6072     llvm::Value *OffloadingArgs[] = {
6073         DeviceID,         PointerNum,    BasePointersArrayArg,
6074         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
6075     auto &RT = CGF.CGM.getOpenMPRuntime();
6076     CGF.EmitRuntimeCall(
6077         RT.createRuntimeFunction(isa<OMPTargetEnterDataDirective>(D)
6078                                      ? OMPRTL__tgt_target_data_begin
6079                                      : OMPRTL__tgt_target_data_end),
6080         OffloadingArgs);
6081   };
6082 
6083   // In the event we get an if clause, we don't have to take any action on the
6084   // else side.
6085   auto &&ElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
6086 
6087   if (IfCond) {
6088     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
6089   } else {
6090     RegionCodeGenTy ThenGenRCG(ThenGen);
6091     ThenGenRCG(CGF);
6092   }
6093 }
6094 
6095 namespace {
6096   /// Kind of parameter in a function with 'declare simd' directive.
6097   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
6098   /// Attribute set of the parameter.
6099   struct ParamAttrTy {
6100     ParamKindTy Kind = Vector;
6101     llvm::APSInt StrideOrArg;
6102     llvm::APSInt Alignment;
6103   };
6104 } // namespace
6105 
6106 static unsigned evaluateCDTSize(const FunctionDecl *FD,
6107                                 ArrayRef<ParamAttrTy> ParamAttrs) {
6108   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
6109   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
6110   // of that clause. The VLEN value must be power of 2.
6111   // In other case the notion of the function`s "characteristic data type" (CDT)
6112   // is used to compute the vector length.
6113   // CDT is defined in the following order:
6114   //   a) For non-void function, the CDT is the return type.
6115   //   b) If the function has any non-uniform, non-linear parameters, then the
6116   //   CDT is the type of the first such parameter.
6117   //   c) If the CDT determined by a) or b) above is struct, union, or class
6118   //   type which is pass-by-value (except for the type that maps to the
6119   //   built-in complex data type), the characteristic data type is int.
6120   //   d) If none of the above three cases is applicable, the CDT is int.
6121   // The VLEN is then determined based on the CDT and the size of vector
6122   // register of that ISA for which current vector version is generated. The
6123   // VLEN is computed using the formula below:
6124   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
6125   // where vector register size specified in section 3.2.1 Registers and the
6126   // Stack Frame of original AMD64 ABI document.
6127   QualType RetType = FD->getReturnType();
6128   if (RetType.isNull())
6129     return 0;
6130   ASTContext &C = FD->getASTContext();
6131   QualType CDT;
6132   if (!RetType.isNull() && !RetType->isVoidType())
6133     CDT = RetType;
6134   else {
6135     unsigned Offset = 0;
6136     if (auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
6137       if (ParamAttrs[Offset].Kind == Vector)
6138         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
6139       ++Offset;
6140     }
6141     if (CDT.isNull()) {
6142       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
6143         if (ParamAttrs[I + Offset].Kind == Vector) {
6144           CDT = FD->getParamDecl(I)->getType();
6145           break;
6146         }
6147       }
6148     }
6149   }
6150   if (CDT.isNull())
6151     CDT = C.IntTy;
6152   CDT = CDT->getCanonicalTypeUnqualified();
6153   if (CDT->isRecordType() || CDT->isUnionType())
6154     CDT = C.IntTy;
6155   return C.getTypeSize(CDT);
6156 }
6157 
6158 static void
6159 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
6160                            llvm::APSInt VLENVal,
6161                            ArrayRef<ParamAttrTy> ParamAttrs,
6162                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
6163   struct ISADataTy {
6164     char ISA;
6165     unsigned VecRegSize;
6166   };
6167   ISADataTy ISAData[] = {
6168       {
6169           'b', 128
6170       }, // SSE
6171       {
6172           'c', 256
6173       }, // AVX
6174       {
6175           'd', 256
6176       }, // AVX2
6177       {
6178           'e', 512
6179       }, // AVX512
6180   };
6181   llvm::SmallVector<char, 2> Masked;
6182   switch (State) {
6183   case OMPDeclareSimdDeclAttr::BS_Undefined:
6184     Masked.push_back('N');
6185     Masked.push_back('M');
6186     break;
6187   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
6188     Masked.push_back('N');
6189     break;
6190   case OMPDeclareSimdDeclAttr::BS_Inbranch:
6191     Masked.push_back('M');
6192     break;
6193   }
6194   for (auto Mask : Masked) {
6195     for (auto &Data : ISAData) {
6196       SmallString<256> Buffer;
6197       llvm::raw_svector_ostream Out(Buffer);
6198       Out << "_ZGV" << Data.ISA << Mask;
6199       if (!VLENVal) {
6200         Out << llvm::APSInt::getUnsigned(Data.VecRegSize /
6201                                          evaluateCDTSize(FD, ParamAttrs));
6202       } else
6203         Out << VLENVal;
6204       for (auto &ParamAttr : ParamAttrs) {
6205         switch (ParamAttr.Kind){
6206         case LinearWithVarStride:
6207           Out << 's' << ParamAttr.StrideOrArg;
6208           break;
6209         case Linear:
6210           Out << 'l';
6211           if (!!ParamAttr.StrideOrArg)
6212             Out << ParamAttr.StrideOrArg;
6213           break;
6214         case Uniform:
6215           Out << 'u';
6216           break;
6217         case Vector:
6218           Out << 'v';
6219           break;
6220         }
6221         if (!!ParamAttr.Alignment)
6222           Out << 'a' << ParamAttr.Alignment;
6223       }
6224       Out << '_' << Fn->getName();
6225       Fn->addFnAttr(Out.str());
6226     }
6227   }
6228 }
6229 
6230 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
6231                                               llvm::Function *Fn) {
6232   ASTContext &C = CGM.getContext();
6233   FD = FD->getCanonicalDecl();
6234   // Map params to their positions in function decl.
6235   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
6236   if (isa<CXXMethodDecl>(FD))
6237     ParamPositions.insert({FD, 0});
6238   unsigned ParamPos = ParamPositions.size();
6239   for (auto *P : FD->params()) {
6240     ParamPositions.insert({P->getCanonicalDecl(), ParamPos});
6241     ++ParamPos;
6242   }
6243   for (auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
6244     llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
6245     // Mark uniform parameters.
6246     for (auto *E : Attr->uniforms()) {
6247       E = E->IgnoreParenImpCasts();
6248       unsigned Pos;
6249       if (isa<CXXThisExpr>(E))
6250         Pos = ParamPositions[FD];
6251       else {
6252         auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
6253                         ->getCanonicalDecl();
6254         Pos = ParamPositions[PVD];
6255       }
6256       ParamAttrs[Pos].Kind = Uniform;
6257     }
6258     // Get alignment info.
6259     auto NI = Attr->alignments_begin();
6260     for (auto *E : Attr->aligneds()) {
6261       E = E->IgnoreParenImpCasts();
6262       unsigned Pos;
6263       QualType ParmTy;
6264       if (isa<CXXThisExpr>(E)) {
6265         Pos = ParamPositions[FD];
6266         ParmTy = E->getType();
6267       } else {
6268         auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
6269                         ->getCanonicalDecl();
6270         Pos = ParamPositions[PVD];
6271         ParmTy = PVD->getType();
6272       }
6273       ParamAttrs[Pos].Alignment =
6274           (*NI) ? (*NI)->EvaluateKnownConstInt(C)
6275                 : llvm::APSInt::getUnsigned(
6276                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
6277                           .getQuantity());
6278       ++NI;
6279     }
6280     // Mark linear parameters.
6281     auto SI = Attr->steps_begin();
6282     auto MI = Attr->modifiers_begin();
6283     for (auto *E : Attr->linears()) {
6284       E = E->IgnoreParenImpCasts();
6285       unsigned Pos;
6286       if (isa<CXXThisExpr>(E))
6287         Pos = ParamPositions[FD];
6288       else {
6289         auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
6290                         ->getCanonicalDecl();
6291         Pos = ParamPositions[PVD];
6292       }
6293       auto &ParamAttr = ParamAttrs[Pos];
6294       ParamAttr.Kind = Linear;
6295       if (*SI) {
6296         if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C,
6297                                   Expr::SE_AllowSideEffects)) {
6298           if (auto *DRE = cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
6299             if (auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
6300               ParamAttr.Kind = LinearWithVarStride;
6301               ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
6302                   ParamPositions[StridePVD->getCanonicalDecl()]);
6303             }
6304           }
6305         }
6306       }
6307       ++SI;
6308       ++MI;
6309     }
6310     llvm::APSInt VLENVal;
6311     if (const Expr *VLEN = Attr->getSimdlen())
6312       VLENVal = VLEN->EvaluateKnownConstInt(C);
6313     OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
6314     if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
6315         CGM.getTriple().getArch() == llvm::Triple::x86_64)
6316       emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
6317   }
6318 }
6319