1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for OpenMP runtime code generation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGOpenMPRuntime.h"
17 #include "CodeGenFunction.h"
18 #include "ConstantBuilder.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/Bitcode/BitcodeReader.h"
23 #include "llvm/IR/CallSite.h"
24 #include "llvm/IR/DerivedTypes.h"
25 #include "llvm/IR/GlobalValue.h"
26 #include "llvm/IR/Value.h"
27 #include "llvm/Support/Format.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <cassert>
30 
31 using namespace clang;
32 using namespace CodeGen;
33 
34 namespace {
35 /// \brief Base class for handling code generation inside OpenMP regions.
36 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
37 public:
38   /// \brief Kinds of OpenMP regions used in codegen.
39   enum CGOpenMPRegionKind {
40     /// \brief Region with outlined function for standalone 'parallel'
41     /// directive.
42     ParallelOutlinedRegion,
43     /// \brief Region with outlined function for standalone 'task' directive.
44     TaskOutlinedRegion,
45     /// \brief Region for constructs that do not require function outlining,
46     /// like 'for', 'sections', 'atomic' etc. directives.
47     InlinedRegion,
48     /// \brief Region with outlined function for standalone 'target' directive.
49     TargetRegion,
50   };
51 
52   CGOpenMPRegionInfo(const CapturedStmt &CS,
53                      const CGOpenMPRegionKind RegionKind,
54                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
55                      bool HasCancel)
56       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
57         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
58 
59   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
60                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
61                      bool HasCancel)
62       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
63         Kind(Kind), HasCancel(HasCancel) {}
64 
65   /// \brief Get a variable or parameter for storing global thread id
66   /// inside OpenMP construct.
67   virtual const VarDecl *getThreadIDVariable() const = 0;
68 
69   /// \brief Emit the captured statement body.
70   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
71 
72   /// \brief Get an LValue for the current ThreadID variable.
73   /// \return LValue for thread id variable. This LValue always has type int32*.
74   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
75 
76   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
77 
78   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
79 
80   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
81 
82   bool hasCancel() const { return HasCancel; }
83 
84   static bool classof(const CGCapturedStmtInfo *Info) {
85     return Info->getKind() == CR_OpenMP;
86   }
87 
88   ~CGOpenMPRegionInfo() override = default;
89 
90 protected:
91   CGOpenMPRegionKind RegionKind;
92   RegionCodeGenTy CodeGen;
93   OpenMPDirectiveKind Kind;
94   bool HasCancel;
95 };
96 
97 /// \brief API for captured statement code generation in OpenMP constructs.
98 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
99 public:
100   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
101                              const RegionCodeGenTy &CodeGen,
102                              OpenMPDirectiveKind Kind, bool HasCancel)
103       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
104                            HasCancel),
105         ThreadIDVar(ThreadIDVar) {
106     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
107   }
108 
109   /// \brief Get a variable or parameter for storing global thread id
110   /// inside OpenMP construct.
111   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
112 
113   /// \brief Get the name of the capture helper.
114   StringRef getHelperName() const override { return ".omp_outlined."; }
115 
116   static bool classof(const CGCapturedStmtInfo *Info) {
117     return CGOpenMPRegionInfo::classof(Info) &&
118            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
119                ParallelOutlinedRegion;
120   }
121 
122 private:
123   /// \brief A variable or parameter storing global thread id for OpenMP
124   /// constructs.
125   const VarDecl *ThreadIDVar;
126 };
127 
128 /// \brief API for captured statement code generation in OpenMP constructs.
129 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
130 public:
131   class UntiedTaskActionTy final : public PrePostActionTy {
132     bool Untied;
133     const VarDecl *PartIDVar;
134     const RegionCodeGenTy UntiedCodeGen;
135     llvm::SwitchInst *UntiedSwitch = nullptr;
136 
137   public:
138     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
139                        const RegionCodeGenTy &UntiedCodeGen)
140         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
141     void Enter(CodeGenFunction &CGF) override {
142       if (Untied) {
143         // Emit task switching point.
144         auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
145             CGF.GetAddrOfLocalVar(PartIDVar),
146             PartIDVar->getType()->castAs<PointerType>());
147         auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, SourceLocation());
148         auto *DoneBB = CGF.createBasicBlock(".untied.done.");
149         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
150         CGF.EmitBlock(DoneBB);
151         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
152         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
153         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
154                               CGF.Builder.GetInsertBlock());
155         emitUntiedSwitch(CGF);
156       }
157     }
158     void emitUntiedSwitch(CodeGenFunction &CGF) const {
159       if (Untied) {
160         auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
161             CGF.GetAddrOfLocalVar(PartIDVar),
162             PartIDVar->getType()->castAs<PointerType>());
163         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
164                               PartIdLVal);
165         UntiedCodeGen(CGF);
166         CodeGenFunction::JumpDest CurPoint =
167             CGF.getJumpDestInCurrentScope(".untied.next.");
168         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
169         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
170         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
171                               CGF.Builder.GetInsertBlock());
172         CGF.EmitBranchThroughCleanup(CurPoint);
173         CGF.EmitBlock(CurPoint.getBlock());
174       }
175     }
176     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
177   };
178   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
179                                  const VarDecl *ThreadIDVar,
180                                  const RegionCodeGenTy &CodeGen,
181                                  OpenMPDirectiveKind Kind, bool HasCancel,
182                                  const UntiedTaskActionTy &Action)
183       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
184         ThreadIDVar(ThreadIDVar), Action(Action) {
185     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
186   }
187 
188   /// \brief Get a variable or parameter for storing global thread id
189   /// inside OpenMP construct.
190   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
191 
192   /// \brief Get an LValue for the current ThreadID variable.
193   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
194 
195   /// \brief Get the name of the capture helper.
196   StringRef getHelperName() const override { return ".omp_outlined."; }
197 
198   void emitUntiedSwitch(CodeGenFunction &CGF) override {
199     Action.emitUntiedSwitch(CGF);
200   }
201 
202   static bool classof(const CGCapturedStmtInfo *Info) {
203     return CGOpenMPRegionInfo::classof(Info) &&
204            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
205                TaskOutlinedRegion;
206   }
207 
208 private:
209   /// \brief A variable or parameter storing global thread id for OpenMP
210   /// constructs.
211   const VarDecl *ThreadIDVar;
212   /// Action for emitting code for untied tasks.
213   const UntiedTaskActionTy &Action;
214 };
215 
216 /// \brief API for inlined captured statement code generation in OpenMP
217 /// constructs.
218 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
219 public:
220   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
221                             const RegionCodeGenTy &CodeGen,
222                             OpenMPDirectiveKind Kind, bool HasCancel)
223       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
224         OldCSI(OldCSI),
225         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
226 
227   // \brief Retrieve the value of the context parameter.
228   llvm::Value *getContextValue() const override {
229     if (OuterRegionInfo)
230       return OuterRegionInfo->getContextValue();
231     llvm_unreachable("No context value for inlined OpenMP region");
232   }
233 
234   void setContextValue(llvm::Value *V) override {
235     if (OuterRegionInfo) {
236       OuterRegionInfo->setContextValue(V);
237       return;
238     }
239     llvm_unreachable("No context value for inlined OpenMP region");
240   }
241 
242   /// \brief Lookup the captured field decl for a variable.
243   const FieldDecl *lookup(const VarDecl *VD) const override {
244     if (OuterRegionInfo)
245       return OuterRegionInfo->lookup(VD);
246     // If there is no outer outlined region,no need to lookup in a list of
247     // captured variables, we can use the original one.
248     return nullptr;
249   }
250 
251   FieldDecl *getThisFieldDecl() const override {
252     if (OuterRegionInfo)
253       return OuterRegionInfo->getThisFieldDecl();
254     return nullptr;
255   }
256 
257   /// \brief Get a variable or parameter for storing global thread id
258   /// inside OpenMP construct.
259   const VarDecl *getThreadIDVariable() const override {
260     if (OuterRegionInfo)
261       return OuterRegionInfo->getThreadIDVariable();
262     return nullptr;
263   }
264 
265   /// \brief Get the name of the capture helper.
266   StringRef getHelperName() const override {
267     if (auto *OuterRegionInfo = getOldCSI())
268       return OuterRegionInfo->getHelperName();
269     llvm_unreachable("No helper name for inlined OpenMP construct");
270   }
271 
272   void emitUntiedSwitch(CodeGenFunction &CGF) override {
273     if (OuterRegionInfo)
274       OuterRegionInfo->emitUntiedSwitch(CGF);
275   }
276 
277   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
278 
279   static bool classof(const CGCapturedStmtInfo *Info) {
280     return CGOpenMPRegionInfo::classof(Info) &&
281            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
282   }
283 
284   ~CGOpenMPInlinedRegionInfo() override = default;
285 
286 private:
287   /// \brief CodeGen info about outer OpenMP region.
288   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
289   CGOpenMPRegionInfo *OuterRegionInfo;
290 };
291 
292 /// \brief API for captured statement code generation in OpenMP target
293 /// constructs. For this captures, implicit parameters are used instead of the
294 /// captured fields. The name of the target region has to be unique in a given
295 /// application so it is provided by the client, because only the client has
296 /// the information to generate that.
297 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
298 public:
299   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
300                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
301       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
302                            /*HasCancel=*/false),
303         HelperName(HelperName) {}
304 
305   /// \brief This is unused for target regions because each starts executing
306   /// with a single thread.
307   const VarDecl *getThreadIDVariable() const override { return nullptr; }
308 
309   /// \brief Get the name of the capture helper.
310   StringRef getHelperName() const override { return HelperName; }
311 
312   static bool classof(const CGCapturedStmtInfo *Info) {
313     return CGOpenMPRegionInfo::classof(Info) &&
314            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
315   }
316 
317 private:
318   StringRef HelperName;
319 };
320 
321 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
322   llvm_unreachable("No codegen for expressions");
323 }
324 /// \brief API for generation of expressions captured in a innermost OpenMP
325 /// region.
326 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
327 public:
328   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
329       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
330                                   OMPD_unknown,
331                                   /*HasCancel=*/false),
332         PrivScope(CGF) {
333     // Make sure the globals captured in the provided statement are local by
334     // using the privatization logic. We assume the same variable is not
335     // captured more than once.
336     for (auto &C : CS.captures()) {
337       if (!C.capturesVariable() && !C.capturesVariableByCopy())
338         continue;
339 
340       const VarDecl *VD = C.getCapturedVar();
341       if (VD->isLocalVarDeclOrParm())
342         continue;
343 
344       DeclRefExpr DRE(const_cast<VarDecl *>(VD),
345                       /*RefersToEnclosingVariableOrCapture=*/false,
346                       VD->getType().getNonReferenceType(), VK_LValue,
347                       SourceLocation());
348       PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address {
349         return CGF.EmitLValue(&DRE).getAddress();
350       });
351     }
352     (void)PrivScope.Privatize();
353   }
354 
355   /// \brief Lookup the captured field decl for a variable.
356   const FieldDecl *lookup(const VarDecl *VD) const override {
357     if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
358       return FD;
359     return nullptr;
360   }
361 
362   /// \brief Emit the captured statement body.
363   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
364     llvm_unreachable("No body for expressions");
365   }
366 
367   /// \brief Get a variable or parameter for storing global thread id
368   /// inside OpenMP construct.
369   const VarDecl *getThreadIDVariable() const override {
370     llvm_unreachable("No thread id for expressions");
371   }
372 
373   /// \brief Get the name of the capture helper.
374   StringRef getHelperName() const override {
375     llvm_unreachable("No helper name for expressions");
376   }
377 
378   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
379 
380 private:
381   /// Private scope to capture global variables.
382   CodeGenFunction::OMPPrivateScope PrivScope;
383 };
384 
385 /// \brief RAII for emitting code of OpenMP constructs.
386 class InlinedOpenMPRegionRAII {
387   CodeGenFunction &CGF;
388   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
389   FieldDecl *LambdaThisCaptureField = nullptr;
390 
391 public:
392   /// \brief Constructs region for combined constructs.
393   /// \param CodeGen Code generation sequence for combined directives. Includes
394   /// a list of functions used for code generation of implicitly inlined
395   /// regions.
396   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
397                           OpenMPDirectiveKind Kind, bool HasCancel)
398       : CGF(CGF) {
399     // Start emission for the construct.
400     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
401         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
402     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
403     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
404     CGF.LambdaThisCaptureField = nullptr;
405   }
406 
407   ~InlinedOpenMPRegionRAII() {
408     // Restore original CapturedStmtInfo only if we're done with code emission.
409     auto *OldCSI =
410         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
411     delete CGF.CapturedStmtInfo;
412     CGF.CapturedStmtInfo = OldCSI;
413     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
414     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
415   }
416 };
417 
418 /// \brief Values for bit flags used in the ident_t to describe the fields.
419 /// All enumeric elements are named and described in accordance with the code
420 /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
421 enum OpenMPLocationFlags {
422   /// \brief Use trampoline for internal microtask.
423   OMP_IDENT_IMD = 0x01,
424   /// \brief Use c-style ident structure.
425   OMP_IDENT_KMPC = 0x02,
426   /// \brief Atomic reduction option for kmpc_reduce.
427   OMP_ATOMIC_REDUCE = 0x10,
428   /// \brief Explicit 'barrier' directive.
429   OMP_IDENT_BARRIER_EXPL = 0x20,
430   /// \brief Implicit barrier in code.
431   OMP_IDENT_BARRIER_IMPL = 0x40,
432   /// \brief Implicit barrier in 'for' directive.
433   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
434   /// \brief Implicit barrier in 'sections' directive.
435   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
436   /// \brief Implicit barrier in 'single' directive.
437   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140
438 };
439 
440 /// \brief Describes ident structure that describes a source location.
441 /// All descriptions are taken from
442 /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
443 /// Original structure:
444 /// typedef struct ident {
445 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
446 ///                                  see above  */
447 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
448 ///                                  KMP_IDENT_KMPC identifies this union
449 ///                                  member  */
450 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
451 ///                                  see above */
452 ///#if USE_ITT_BUILD
453 ///                            /*  but currently used for storing
454 ///                                region-specific ITT */
455 ///                            /*  contextual information. */
456 ///#endif /* USE_ITT_BUILD */
457 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
458 ///                                 C++  */
459 ///    char const *psource;    /**< String describing the source location.
460 ///                            The string is composed of semi-colon separated
461 //                             fields which describe the source file,
462 ///                            the function and a pair of line numbers that
463 ///                            delimit the construct.
464 ///                             */
465 /// } ident_t;
466 enum IdentFieldIndex {
467   /// \brief might be used in Fortran
468   IdentField_Reserved_1,
469   /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
470   IdentField_Flags,
471   /// \brief Not really used in Fortran any more
472   IdentField_Reserved_2,
473   /// \brief Source[4] in Fortran, do not use for C++
474   IdentField_Reserved_3,
475   /// \brief String describing the source location. The string is composed of
476   /// semi-colon separated fields which describe the source file, the function
477   /// and a pair of line numbers that delimit the construct.
478   IdentField_PSource
479 };
480 
481 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from
482 /// the enum sched_type in kmp.h).
483 enum OpenMPSchedType {
484   /// \brief Lower bound for default (unordered) versions.
485   OMP_sch_lower = 32,
486   OMP_sch_static_chunked = 33,
487   OMP_sch_static = 34,
488   OMP_sch_dynamic_chunked = 35,
489   OMP_sch_guided_chunked = 36,
490   OMP_sch_runtime = 37,
491   OMP_sch_auto = 38,
492   /// static with chunk adjustment (e.g., simd)
493   OMP_sch_static_balanced_chunked   = 45,
494   /// \brief Lower bound for 'ordered' versions.
495   OMP_ord_lower = 64,
496   OMP_ord_static_chunked = 65,
497   OMP_ord_static = 66,
498   OMP_ord_dynamic_chunked = 67,
499   OMP_ord_guided_chunked = 68,
500   OMP_ord_runtime = 69,
501   OMP_ord_auto = 70,
502   OMP_sch_default = OMP_sch_static,
503   /// \brief dist_schedule types
504   OMP_dist_sch_static_chunked = 91,
505   OMP_dist_sch_static = 92,
506   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
507   /// Set if the monotonic schedule modifier was present.
508   OMP_sch_modifier_monotonic = (1 << 29),
509   /// Set if the nonmonotonic schedule modifier was present.
510   OMP_sch_modifier_nonmonotonic = (1 << 30),
511 };
512 
513 enum OpenMPRTLFunction {
514   /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
515   /// kmpc_micro microtask, ...);
516   OMPRTL__kmpc_fork_call,
517   /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc,
518   /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
519   OMPRTL__kmpc_threadprivate_cached,
520   /// \brief Call to void __kmpc_threadprivate_register( ident_t *,
521   /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
522   OMPRTL__kmpc_threadprivate_register,
523   // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
524   OMPRTL__kmpc_global_thread_num,
525   // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
526   // kmp_critical_name *crit);
527   OMPRTL__kmpc_critical,
528   // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
529   // global_tid, kmp_critical_name *crit, uintptr_t hint);
530   OMPRTL__kmpc_critical_with_hint,
531   // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
532   // kmp_critical_name *crit);
533   OMPRTL__kmpc_end_critical,
534   // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
535   // global_tid);
536   OMPRTL__kmpc_cancel_barrier,
537   // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
538   OMPRTL__kmpc_barrier,
539   // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
540   OMPRTL__kmpc_for_static_fini,
541   // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
542   // global_tid);
543   OMPRTL__kmpc_serialized_parallel,
544   // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
545   // global_tid);
546   OMPRTL__kmpc_end_serialized_parallel,
547   // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
548   // kmp_int32 num_threads);
549   OMPRTL__kmpc_push_num_threads,
550   // Call to void __kmpc_flush(ident_t *loc);
551   OMPRTL__kmpc_flush,
552   // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
553   OMPRTL__kmpc_master,
554   // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
555   OMPRTL__kmpc_end_master,
556   // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
557   // int end_part);
558   OMPRTL__kmpc_omp_taskyield,
559   // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
560   OMPRTL__kmpc_single,
561   // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
562   OMPRTL__kmpc_end_single,
563   // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
564   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
565   // kmp_routine_entry_t *task_entry);
566   OMPRTL__kmpc_omp_task_alloc,
567   // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
568   // new_task);
569   OMPRTL__kmpc_omp_task,
570   // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
571   // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
572   // kmp_int32 didit);
573   OMPRTL__kmpc_copyprivate,
574   // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
575   // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
576   // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
577   OMPRTL__kmpc_reduce,
578   // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
579   // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
580   // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
581   // *lck);
582   OMPRTL__kmpc_reduce_nowait,
583   // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
584   // kmp_critical_name *lck);
585   OMPRTL__kmpc_end_reduce,
586   // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
587   // kmp_critical_name *lck);
588   OMPRTL__kmpc_end_reduce_nowait,
589   // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
590   // kmp_task_t * new_task);
591   OMPRTL__kmpc_omp_task_begin_if0,
592   // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
593   // kmp_task_t * new_task);
594   OMPRTL__kmpc_omp_task_complete_if0,
595   // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
596   OMPRTL__kmpc_ordered,
597   // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
598   OMPRTL__kmpc_end_ordered,
599   // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
600   // global_tid);
601   OMPRTL__kmpc_omp_taskwait,
602   // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
603   OMPRTL__kmpc_taskgroup,
604   // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
605   OMPRTL__kmpc_end_taskgroup,
606   // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
607   // int proc_bind);
608   OMPRTL__kmpc_push_proc_bind,
609   // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
610   // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
611   // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
612   OMPRTL__kmpc_omp_task_with_deps,
613   // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
614   // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
615   // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
616   OMPRTL__kmpc_omp_wait_deps,
617   // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
618   // global_tid, kmp_int32 cncl_kind);
619   OMPRTL__kmpc_cancellationpoint,
620   // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
621   // kmp_int32 cncl_kind);
622   OMPRTL__kmpc_cancel,
623   // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
624   // kmp_int32 num_teams, kmp_int32 thread_limit);
625   OMPRTL__kmpc_push_num_teams,
626   // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
627   // microtask, ...);
628   OMPRTL__kmpc_fork_teams,
629   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
630   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
631   // sched, kmp_uint64 grainsize, void *task_dup);
632   OMPRTL__kmpc_taskloop,
633   // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
634   // num_dims, struct kmp_dim *dims);
635   OMPRTL__kmpc_doacross_init,
636   // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
637   OMPRTL__kmpc_doacross_fini,
638   // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
639   // *vec);
640   OMPRTL__kmpc_doacross_post,
641   // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
642   // *vec);
643   OMPRTL__kmpc_doacross_wait,
644 
645   //
646   // Offloading related calls
647   //
648   // Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
649   // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
650   // *arg_types);
651   OMPRTL__tgt_target,
652   // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
653   // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
654   // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
655   OMPRTL__tgt_target_teams,
656   // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
657   OMPRTL__tgt_register_lib,
658   // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
659   OMPRTL__tgt_unregister_lib,
660   // Call to void __tgt_target_data_begin(int32_t device_id, int32_t arg_num,
661   // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
662   OMPRTL__tgt_target_data_begin,
663   // Call to void __tgt_target_data_end(int32_t device_id, int32_t arg_num,
664   // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
665   OMPRTL__tgt_target_data_end,
666   // Call to void __tgt_target_data_update(int32_t device_id, int32_t arg_num,
667   // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
668   OMPRTL__tgt_target_data_update,
669 };
670 
671 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
672 /// region.
673 class CleanupTy final : public EHScopeStack::Cleanup {
674   PrePostActionTy *Action;
675 
676 public:
677   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
678   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
679     if (!CGF.HaveInsertPoint())
680       return;
681     Action->Exit(CGF);
682   }
683 };
684 
685 } // anonymous namespace
686 
687 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
688   CodeGenFunction::RunCleanupsScope Scope(CGF);
689   if (PrePostAction) {
690     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
691     Callback(CodeGen, CGF, *PrePostAction);
692   } else {
693     PrePostActionTy Action;
694     Callback(CodeGen, CGF, Action);
695   }
696 }
697 
698 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
699   return CGF.EmitLoadOfPointerLValue(
700       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
701       getThreadIDVariable()->getType()->castAs<PointerType>());
702 }
703 
704 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
705   if (!CGF.HaveInsertPoint())
706     return;
707   // 1.2.2 OpenMP Language Terminology
708   // Structured block - An executable statement with a single entry at the
709   // top and a single exit at the bottom.
710   // The point of exit cannot be a branch out of the structured block.
711   // longjmp() and throw() must not violate the entry/exit criteria.
712   CGF.EHStack.pushTerminate();
713   CodeGen(CGF);
714   CGF.EHStack.popTerminate();
715 }
716 
717 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
718     CodeGenFunction &CGF) {
719   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
720                             getThreadIDVariable()->getType(),
721                             AlignmentSource::Decl);
722 }
723 
724 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
725     : CGM(CGM), OffloadEntriesInfoManager(CGM) {
726   IdentTy = llvm::StructType::create(
727       "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
728       CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
729       CGM.Int8PtrTy /* psource */, nullptr);
730   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
731 
732   loadOffloadInfoMetadata();
733 }
734 
735 void CGOpenMPRuntime::clear() {
736   InternalVars.clear();
737 }
738 
739 static llvm::Function *
740 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
741                           const Expr *CombinerInitializer, const VarDecl *In,
742                           const VarDecl *Out, bool IsCombiner) {
743   // void .omp_combiner.(Ty *in, Ty *out);
744   auto &C = CGM.getContext();
745   QualType PtrTy = C.getPointerType(Ty).withRestrict();
746   FunctionArgList Args;
747   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
748                                /*Id=*/nullptr, PtrTy);
749   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
750                               /*Id=*/nullptr, PtrTy);
751   Args.push_back(&OmpOutParm);
752   Args.push_back(&OmpInParm);
753   auto &FnInfo =
754       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
755   auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
756   auto *Fn = llvm::Function::Create(
757       FnTy, llvm::GlobalValue::InternalLinkage,
758       IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule());
759   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
760   Fn->addFnAttr(llvm::Attribute::AlwaysInline);
761   CodeGenFunction CGF(CGM);
762   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
763   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
764   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
765   CodeGenFunction::OMPPrivateScope Scope(CGF);
766   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
767   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address {
768     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
769         .getAddress();
770   });
771   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
772   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() -> Address {
773     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
774         .getAddress();
775   });
776   (void)Scope.Privatize();
777   CGF.EmitIgnoredExpr(CombinerInitializer);
778   Scope.ForceCleanup();
779   CGF.FinishFunction();
780   return Fn;
781 }
782 
783 void CGOpenMPRuntime::emitUserDefinedReduction(
784     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
785   if (UDRMap.count(D) > 0)
786     return;
787   auto &C = CGM.getContext();
788   if (!In || !Out) {
789     In = &C.Idents.get("omp_in");
790     Out = &C.Idents.get("omp_out");
791   }
792   llvm::Function *Combiner = emitCombinerOrInitializer(
793       CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()),
794       cast<VarDecl>(D->lookup(Out).front()),
795       /*IsCombiner=*/true);
796   llvm::Function *Initializer = nullptr;
797   if (auto *Init = D->getInitializer()) {
798     if (!Priv || !Orig) {
799       Priv = &C.Idents.get("omp_priv");
800       Orig = &C.Idents.get("omp_orig");
801     }
802     Initializer = emitCombinerOrInitializer(
803         CGM, D->getType(), Init, cast<VarDecl>(D->lookup(Orig).front()),
804         cast<VarDecl>(D->lookup(Priv).front()),
805         /*IsCombiner=*/false);
806   }
807   UDRMap.insert(std::make_pair(D, std::make_pair(Combiner, Initializer)));
808   if (CGF) {
809     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
810     Decls.second.push_back(D);
811   }
812 }
813 
814 std::pair<llvm::Function *, llvm::Function *>
815 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
816   auto I = UDRMap.find(D);
817   if (I != UDRMap.end())
818     return I->second;
819   emitUserDefinedReduction(/*CGF=*/nullptr, D);
820   return UDRMap.lookup(D);
821 }
822 
823 // Layout information for ident_t.
824 static CharUnits getIdentAlign(CodeGenModule &CGM) {
825   return CGM.getPointerAlign();
826 }
827 static CharUnits getIdentSize(CodeGenModule &CGM) {
828   assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign()));
829   return CharUnits::fromQuantity(16) + CGM.getPointerSize();
830 }
831 static CharUnits getOffsetOfIdentField(IdentFieldIndex Field) {
832   // All the fields except the last are i32, so this works beautifully.
833   return unsigned(Field) * CharUnits::fromQuantity(4);
834 }
835 static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr,
836                                    IdentFieldIndex Field,
837                                    const llvm::Twine &Name = "") {
838   auto Offset = getOffsetOfIdentField(Field);
839   return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name);
840 }
841 
842 llvm::Value *CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction(
843     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
844     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
845   assert(ThreadIDVar->getType()->isPointerType() &&
846          "thread id variable must be of type kmp_int32 *");
847   const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
848   CodeGenFunction CGF(CGM, true);
849   bool HasCancel = false;
850   if (auto *OPD = dyn_cast<OMPParallelDirective>(&D))
851     HasCancel = OPD->hasCancel();
852   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
853     HasCancel = OPSD->hasCancel();
854   else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
855     HasCancel = OPFD->hasCancel();
856   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
857                                     HasCancel);
858   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
859   return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
860 }
861 
862 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
863     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
864     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
865     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
866     bool Tied, unsigned &NumberOfParts) {
867   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
868                                               PrePostActionTy &) {
869     auto *ThreadID = getThreadID(CGF, D.getLocStart());
870     auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart());
871     llvm::Value *TaskArgs[] = {
872         UpLoc, ThreadID,
873         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
874                                     TaskTVar->getType()->castAs<PointerType>())
875             .getPointer()};
876     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
877   };
878   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
879                                                             UntiedCodeGen);
880   CodeGen.setAction(Action);
881   assert(!ThreadIDVar->getType()->isPointerType() &&
882          "thread id variable must be of type kmp_int32 for tasks");
883   auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
884   auto *TD = dyn_cast<OMPTaskDirective>(&D);
885   CodeGenFunction CGF(CGM, true);
886   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
887                                         InnermostKind,
888                                         TD ? TD->hasCancel() : false, Action);
889   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
890   auto *Res = CGF.GenerateCapturedStmtFunction(*CS);
891   if (!Tied)
892     NumberOfParts = Action.getNumberOfParts();
893   return Res;
894 }
895 
896 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
897   CharUnits Align = getIdentAlign(CGM);
898   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
899   if (!Entry) {
900     if (!DefaultOpenMPPSource) {
901       // Initialize default location for psource field of ident_t structure of
902       // all ident_t objects. Format is ";file;function;line;column;;".
903       // Taken from
904       // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
905       DefaultOpenMPPSource =
906           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
907       DefaultOpenMPPSource =
908           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
909     }
910 
911     ConstantInitBuilder builder(CGM);
912     auto fields = builder.beginStruct(IdentTy);
913     fields.addInt(CGM.Int32Ty, 0);
914     fields.addInt(CGM.Int32Ty, Flags);
915     fields.addInt(CGM.Int32Ty, 0);
916     fields.addInt(CGM.Int32Ty, 0);
917     fields.add(DefaultOpenMPPSource);
918     auto DefaultOpenMPLocation =
919       fields.finishAndCreateGlobal("", Align, /*isConstant*/ true,
920                                    llvm::GlobalValue::PrivateLinkage);
921     DefaultOpenMPLocation->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
922 
923     OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
924   }
925   return Address(Entry, Align);
926 }
927 
928 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
929                                                  SourceLocation Loc,
930                                                  unsigned Flags) {
931   Flags |= OMP_IDENT_KMPC;
932   // If no debug info is generated - return global default location.
933   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
934       Loc.isInvalid())
935     return getOrCreateDefaultLocation(Flags).getPointer();
936 
937   assert(CGF.CurFn && "No function in current CodeGenFunction.");
938 
939   Address LocValue = Address::invalid();
940   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
941   if (I != OpenMPLocThreadIDMap.end())
942     LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM));
943 
944   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
945   // GetOpenMPThreadID was called before this routine.
946   if (!LocValue.isValid()) {
947     // Generate "ident_t .kmpc_loc.addr;"
948     Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM),
949                                       ".kmpc_loc.addr");
950     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
951     Elem.second.DebugLoc = AI.getPointer();
952     LocValue = AI;
953 
954     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
955     CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
956     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
957                              CGM.getSize(getIdentSize(CGF.CGM)));
958   }
959 
960   // char **psource = &.kmpc_loc_<flags>.addr.psource;
961   Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource);
962 
963   auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
964   if (OMPDebugLoc == nullptr) {
965     SmallString<128> Buffer2;
966     llvm::raw_svector_ostream OS2(Buffer2);
967     // Build debug location
968     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
969     OS2 << ";" << PLoc.getFilename() << ";";
970     if (const FunctionDecl *FD =
971             dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
972       OS2 << FD->getQualifiedNameAsString();
973     }
974     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
975     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
976     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
977   }
978   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
979   CGF.Builder.CreateStore(OMPDebugLoc, PSource);
980 
981   // Our callers always pass this to a runtime function, so for
982   // convenience, go ahead and return a naked pointer.
983   return LocValue.getPointer();
984 }
985 
986 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
987                                           SourceLocation Loc) {
988   assert(CGF.CurFn && "No function in current CodeGenFunction.");
989 
990   llvm::Value *ThreadID = nullptr;
991   // Check whether we've already cached a load of the thread id in this
992   // function.
993   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
994   if (I != OpenMPLocThreadIDMap.end()) {
995     ThreadID = I->second.ThreadID;
996     if (ThreadID != nullptr)
997       return ThreadID;
998   }
999   if (auto *OMPRegionInfo =
1000           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1001     if (OMPRegionInfo->getThreadIDVariable()) {
1002       // Check if this an outlined function with thread id passed as argument.
1003       auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1004       ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
1005       // If value loaded in entry block, cache it and use it everywhere in
1006       // function.
1007       if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1008         auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1009         Elem.second.ThreadID = ThreadID;
1010       }
1011       return ThreadID;
1012     }
1013   }
1014 
1015   // This is not an outlined function region - need to call __kmpc_int32
1016   // kmpc_global_thread_num(ident_t *loc).
1017   // Generate thread id value and cache this value for use across the
1018   // function.
1019   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1020   CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
1021   ThreadID =
1022       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1023                           emitUpdateLocation(CGF, Loc));
1024   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1025   Elem.second.ThreadID = ThreadID;
1026   return ThreadID;
1027 }
1028 
1029 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1030   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1031   if (OpenMPLocThreadIDMap.count(CGF.CurFn))
1032     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1033   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1034     for(auto *D : FunctionUDRMap[CGF.CurFn]) {
1035       UDRMap.erase(D);
1036     }
1037     FunctionUDRMap.erase(CGF.CurFn);
1038   }
1039 }
1040 
1041 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1042   if (!IdentTy) {
1043   }
1044   return llvm::PointerType::getUnqual(IdentTy);
1045 }
1046 
1047 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1048   if (!Kmpc_MicroTy) {
1049     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1050     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1051                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1052     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1053   }
1054   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1055 }
1056 
1057 llvm::Constant *
1058 CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1059   llvm::Constant *RTLFn = nullptr;
1060   switch (static_cast<OpenMPRTLFunction>(Function)) {
1061   case OMPRTL__kmpc_fork_call: {
1062     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1063     // microtask, ...);
1064     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1065                                 getKmpc_MicroPointerTy()};
1066     llvm::FunctionType *FnTy =
1067         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1068     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1069     break;
1070   }
1071   case OMPRTL__kmpc_global_thread_num: {
1072     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1073     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1074     llvm::FunctionType *FnTy =
1075         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1076     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1077     break;
1078   }
1079   case OMPRTL__kmpc_threadprivate_cached: {
1080     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1081     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1082     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1083                                 CGM.VoidPtrTy, CGM.SizeTy,
1084                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1085     llvm::FunctionType *FnTy =
1086         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1087     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1088     break;
1089   }
1090   case OMPRTL__kmpc_critical: {
1091     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1092     // kmp_critical_name *crit);
1093     llvm::Type *TypeParams[] = {
1094         getIdentTyPointerTy(), CGM.Int32Ty,
1095         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1096     llvm::FunctionType *FnTy =
1097         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1098     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1099     break;
1100   }
1101   case OMPRTL__kmpc_critical_with_hint: {
1102     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1103     // kmp_critical_name *crit, uintptr_t hint);
1104     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1105                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
1106                                 CGM.IntPtrTy};
1107     llvm::FunctionType *FnTy =
1108         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1109     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1110     break;
1111   }
1112   case OMPRTL__kmpc_threadprivate_register: {
1113     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1114     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1115     // typedef void *(*kmpc_ctor)(void *);
1116     auto KmpcCtorTy =
1117         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1118                                 /*isVarArg*/ false)->getPointerTo();
1119     // typedef void *(*kmpc_cctor)(void *, void *);
1120     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1121     auto KmpcCopyCtorTy =
1122         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1123                                 /*isVarArg*/ false)->getPointerTo();
1124     // typedef void (*kmpc_dtor)(void *);
1125     auto KmpcDtorTy =
1126         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1127             ->getPointerTo();
1128     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1129                               KmpcCopyCtorTy, KmpcDtorTy};
1130     auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1131                                         /*isVarArg*/ false);
1132     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1133     break;
1134   }
1135   case OMPRTL__kmpc_end_critical: {
1136     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1137     // kmp_critical_name *crit);
1138     llvm::Type *TypeParams[] = {
1139         getIdentTyPointerTy(), CGM.Int32Ty,
1140         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1141     llvm::FunctionType *FnTy =
1142         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1143     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1144     break;
1145   }
1146   case OMPRTL__kmpc_cancel_barrier: {
1147     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1148     // global_tid);
1149     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1150     llvm::FunctionType *FnTy =
1151         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1152     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1153     break;
1154   }
1155   case OMPRTL__kmpc_barrier: {
1156     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1157     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1158     llvm::FunctionType *FnTy =
1159         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1160     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1161     break;
1162   }
1163   case OMPRTL__kmpc_for_static_fini: {
1164     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1165     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1166     llvm::FunctionType *FnTy =
1167         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1168     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1169     break;
1170   }
1171   case OMPRTL__kmpc_push_num_threads: {
1172     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1173     // kmp_int32 num_threads)
1174     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1175                                 CGM.Int32Ty};
1176     llvm::FunctionType *FnTy =
1177         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1178     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1179     break;
1180   }
1181   case OMPRTL__kmpc_serialized_parallel: {
1182     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1183     // global_tid);
1184     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1185     llvm::FunctionType *FnTy =
1186         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1187     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1188     break;
1189   }
1190   case OMPRTL__kmpc_end_serialized_parallel: {
1191     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1192     // global_tid);
1193     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1194     llvm::FunctionType *FnTy =
1195         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1196     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1197     break;
1198   }
1199   case OMPRTL__kmpc_flush: {
1200     // Build void __kmpc_flush(ident_t *loc);
1201     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1202     llvm::FunctionType *FnTy =
1203         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1204     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1205     break;
1206   }
1207   case OMPRTL__kmpc_master: {
1208     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1209     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1210     llvm::FunctionType *FnTy =
1211         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1212     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1213     break;
1214   }
1215   case OMPRTL__kmpc_end_master: {
1216     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1217     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1218     llvm::FunctionType *FnTy =
1219         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1220     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1221     break;
1222   }
1223   case OMPRTL__kmpc_omp_taskyield: {
1224     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1225     // int end_part);
1226     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1227     llvm::FunctionType *FnTy =
1228         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1229     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1230     break;
1231   }
1232   case OMPRTL__kmpc_single: {
1233     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1234     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1235     llvm::FunctionType *FnTy =
1236         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1237     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1238     break;
1239   }
1240   case OMPRTL__kmpc_end_single: {
1241     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1242     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1243     llvm::FunctionType *FnTy =
1244         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1245     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1246     break;
1247   }
1248   case OMPRTL__kmpc_omp_task_alloc: {
1249     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1250     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1251     // kmp_routine_entry_t *task_entry);
1252     assert(KmpRoutineEntryPtrTy != nullptr &&
1253            "Type kmp_routine_entry_t must be created.");
1254     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1255                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
1256     // Return void * and then cast to particular kmp_task_t type.
1257     llvm::FunctionType *FnTy =
1258         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1259     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
1260     break;
1261   }
1262   case OMPRTL__kmpc_omp_task: {
1263     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1264     // *new_task);
1265     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1266                                 CGM.VoidPtrTy};
1267     llvm::FunctionType *FnTy =
1268         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1269     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
1270     break;
1271   }
1272   case OMPRTL__kmpc_copyprivate: {
1273     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
1274     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
1275     // kmp_int32 didit);
1276     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1277     auto *CpyFnTy =
1278         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
1279     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
1280                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
1281                                 CGM.Int32Ty};
1282     llvm::FunctionType *FnTy =
1283         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1284     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
1285     break;
1286   }
1287   case OMPRTL__kmpc_reduce: {
1288     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
1289     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
1290     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
1291     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1292     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1293                                                /*isVarArg=*/false);
1294     llvm::Type *TypeParams[] = {
1295         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1296         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1297         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1298     llvm::FunctionType *FnTy =
1299         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1300     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
1301     break;
1302   }
1303   case OMPRTL__kmpc_reduce_nowait: {
1304     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
1305     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
1306     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
1307     // *lck);
1308     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1309     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1310                                                /*isVarArg=*/false);
1311     llvm::Type *TypeParams[] = {
1312         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1313         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1314         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1315     llvm::FunctionType *FnTy =
1316         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1317     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
1318     break;
1319   }
1320   case OMPRTL__kmpc_end_reduce: {
1321     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
1322     // kmp_critical_name *lck);
1323     llvm::Type *TypeParams[] = {
1324         getIdentTyPointerTy(), CGM.Int32Ty,
1325         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1326     llvm::FunctionType *FnTy =
1327         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1328     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
1329     break;
1330   }
1331   case OMPRTL__kmpc_end_reduce_nowait: {
1332     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
1333     // kmp_critical_name *lck);
1334     llvm::Type *TypeParams[] = {
1335         getIdentTyPointerTy(), CGM.Int32Ty,
1336         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1337     llvm::FunctionType *FnTy =
1338         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1339     RTLFn =
1340         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
1341     break;
1342   }
1343   case OMPRTL__kmpc_omp_task_begin_if0: {
1344     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1345     // *new_task);
1346     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1347                                 CGM.VoidPtrTy};
1348     llvm::FunctionType *FnTy =
1349         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1350     RTLFn =
1351         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
1352     break;
1353   }
1354   case OMPRTL__kmpc_omp_task_complete_if0: {
1355     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1356     // *new_task);
1357     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1358                                 CGM.VoidPtrTy};
1359     llvm::FunctionType *FnTy =
1360         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1361     RTLFn = CGM.CreateRuntimeFunction(FnTy,
1362                                       /*Name=*/"__kmpc_omp_task_complete_if0");
1363     break;
1364   }
1365   case OMPRTL__kmpc_ordered: {
1366     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
1367     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1368     llvm::FunctionType *FnTy =
1369         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1370     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
1371     break;
1372   }
1373   case OMPRTL__kmpc_end_ordered: {
1374     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
1375     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1376     llvm::FunctionType *FnTy =
1377         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1378     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
1379     break;
1380   }
1381   case OMPRTL__kmpc_omp_taskwait: {
1382     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
1383     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1384     llvm::FunctionType *FnTy =
1385         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1386     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
1387     break;
1388   }
1389   case OMPRTL__kmpc_taskgroup: {
1390     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
1391     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1392     llvm::FunctionType *FnTy =
1393         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1394     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
1395     break;
1396   }
1397   case OMPRTL__kmpc_end_taskgroup: {
1398     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
1399     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1400     llvm::FunctionType *FnTy =
1401         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1402     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
1403     break;
1404   }
1405   case OMPRTL__kmpc_push_proc_bind: {
1406     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
1407     // int proc_bind)
1408     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1409     llvm::FunctionType *FnTy =
1410         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1411     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
1412     break;
1413   }
1414   case OMPRTL__kmpc_omp_task_with_deps: {
1415     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
1416     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
1417     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
1418     llvm::Type *TypeParams[] = {
1419         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
1420         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
1421     llvm::FunctionType *FnTy =
1422         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1423     RTLFn =
1424         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
1425     break;
1426   }
1427   case OMPRTL__kmpc_omp_wait_deps: {
1428     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
1429     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
1430     // kmp_depend_info_t *noalias_dep_list);
1431     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1432                                 CGM.Int32Ty,           CGM.VoidPtrTy,
1433                                 CGM.Int32Ty,           CGM.VoidPtrTy};
1434     llvm::FunctionType *FnTy =
1435         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1436     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
1437     break;
1438   }
1439   case OMPRTL__kmpc_cancellationpoint: {
1440     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
1441     // global_tid, kmp_int32 cncl_kind)
1442     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1443     llvm::FunctionType *FnTy =
1444         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1445     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
1446     break;
1447   }
1448   case OMPRTL__kmpc_cancel: {
1449     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
1450     // kmp_int32 cncl_kind)
1451     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1452     llvm::FunctionType *FnTy =
1453         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1454     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
1455     break;
1456   }
1457   case OMPRTL__kmpc_push_num_teams: {
1458     // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
1459     // kmp_int32 num_teams, kmp_int32 num_threads)
1460     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1461         CGM.Int32Ty};
1462     llvm::FunctionType *FnTy =
1463         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1464     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
1465     break;
1466   }
1467   case OMPRTL__kmpc_fork_teams: {
1468     // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
1469     // microtask, ...);
1470     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1471                                 getKmpc_MicroPointerTy()};
1472     llvm::FunctionType *FnTy =
1473         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1474     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
1475     break;
1476   }
1477   case OMPRTL__kmpc_taskloop: {
1478     // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
1479     // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
1480     // sched, kmp_uint64 grainsize, void *task_dup);
1481     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
1482                                 CGM.IntTy,
1483                                 CGM.VoidPtrTy,
1484                                 CGM.IntTy,
1485                                 CGM.Int64Ty->getPointerTo(),
1486                                 CGM.Int64Ty->getPointerTo(),
1487                                 CGM.Int64Ty,
1488                                 CGM.IntTy,
1489                                 CGM.IntTy,
1490                                 CGM.Int64Ty,
1491                                 CGM.VoidPtrTy};
1492     llvm::FunctionType *FnTy =
1493         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1494     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
1495     break;
1496   }
1497   case OMPRTL__kmpc_doacross_init: {
1498     // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
1499     // num_dims, struct kmp_dim *dims);
1500     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
1501                                 CGM.Int32Ty,
1502                                 CGM.Int32Ty,
1503                                 CGM.VoidPtrTy};
1504     llvm::FunctionType *FnTy =
1505         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1506     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
1507     break;
1508   }
1509   case OMPRTL__kmpc_doacross_fini: {
1510     // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
1511     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1512     llvm::FunctionType *FnTy =
1513         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1514     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
1515     break;
1516   }
1517   case OMPRTL__kmpc_doacross_post: {
1518     // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
1519     // *vec);
1520     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1521                                 CGM.Int64Ty->getPointerTo()};
1522     llvm::FunctionType *FnTy =
1523         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1524     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
1525     break;
1526   }
1527   case OMPRTL__kmpc_doacross_wait: {
1528     // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
1529     // *vec);
1530     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1531                                 CGM.Int64Ty->getPointerTo()};
1532     llvm::FunctionType *FnTy =
1533         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1534     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
1535     break;
1536   }
1537   case OMPRTL__tgt_target: {
1538     // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
1539     // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
1540     // *arg_types);
1541     llvm::Type *TypeParams[] = {CGM.Int32Ty,
1542                                 CGM.VoidPtrTy,
1543                                 CGM.Int32Ty,
1544                                 CGM.VoidPtrPtrTy,
1545                                 CGM.VoidPtrPtrTy,
1546                                 CGM.SizeTy->getPointerTo(),
1547                                 CGM.Int32Ty->getPointerTo()};
1548     llvm::FunctionType *FnTy =
1549         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1550     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
1551     break;
1552   }
1553   case OMPRTL__tgt_target_teams: {
1554     // Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
1555     // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
1556     // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
1557     llvm::Type *TypeParams[] = {CGM.Int32Ty,
1558                                 CGM.VoidPtrTy,
1559                                 CGM.Int32Ty,
1560                                 CGM.VoidPtrPtrTy,
1561                                 CGM.VoidPtrPtrTy,
1562                                 CGM.SizeTy->getPointerTo(),
1563                                 CGM.Int32Ty->getPointerTo(),
1564                                 CGM.Int32Ty,
1565                                 CGM.Int32Ty};
1566     llvm::FunctionType *FnTy =
1567         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1568     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
1569     break;
1570   }
1571   case OMPRTL__tgt_register_lib: {
1572     // Build void __tgt_register_lib(__tgt_bin_desc *desc);
1573     QualType ParamTy =
1574         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
1575     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
1576     llvm::FunctionType *FnTy =
1577         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1578     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
1579     break;
1580   }
1581   case OMPRTL__tgt_unregister_lib: {
1582     // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
1583     QualType ParamTy =
1584         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
1585     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
1586     llvm::FunctionType *FnTy =
1587         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1588     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
1589     break;
1590   }
1591   case OMPRTL__tgt_target_data_begin: {
1592     // Build void __tgt_target_data_begin(int32_t device_id, int32_t arg_num,
1593     // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
1594     llvm::Type *TypeParams[] = {CGM.Int32Ty,
1595                                 CGM.Int32Ty,
1596                                 CGM.VoidPtrPtrTy,
1597                                 CGM.VoidPtrPtrTy,
1598                                 CGM.SizeTy->getPointerTo(),
1599                                 CGM.Int32Ty->getPointerTo()};
1600     llvm::FunctionType *FnTy =
1601         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1602     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
1603     break;
1604   }
1605   case OMPRTL__tgt_target_data_end: {
1606     // Build void __tgt_target_data_end(int32_t device_id, int32_t arg_num,
1607     // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
1608     llvm::Type *TypeParams[] = {CGM.Int32Ty,
1609                                 CGM.Int32Ty,
1610                                 CGM.VoidPtrPtrTy,
1611                                 CGM.VoidPtrPtrTy,
1612                                 CGM.SizeTy->getPointerTo(),
1613                                 CGM.Int32Ty->getPointerTo()};
1614     llvm::FunctionType *FnTy =
1615         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1616     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
1617     break;
1618   }
1619   case OMPRTL__tgt_target_data_update: {
1620     // Build void __tgt_target_data_update(int32_t device_id, int32_t arg_num,
1621     // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
1622     llvm::Type *TypeParams[] = {CGM.Int32Ty,
1623                                 CGM.Int32Ty,
1624                                 CGM.VoidPtrPtrTy,
1625                                 CGM.VoidPtrPtrTy,
1626                                 CGM.SizeTy->getPointerTo(),
1627                                 CGM.Int32Ty->getPointerTo()};
1628     llvm::FunctionType *FnTy =
1629         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1630     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
1631     break;
1632   }
1633   }
1634   assert(RTLFn && "Unable to find OpenMP runtime function");
1635   return RTLFn;
1636 }
1637 
1638 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
1639                                                              bool IVSigned) {
1640   assert((IVSize == 32 || IVSize == 64) &&
1641          "IV size is not compatible with the omp runtime");
1642   auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1643                                        : "__kmpc_for_static_init_4u")
1644                            : (IVSigned ? "__kmpc_for_static_init_8"
1645                                        : "__kmpc_for_static_init_8u");
1646   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1647   auto PtrTy = llvm::PointerType::getUnqual(ITy);
1648   llvm::Type *TypeParams[] = {
1649     getIdentTyPointerTy(),                     // loc
1650     CGM.Int32Ty,                               // tid
1651     CGM.Int32Ty,                               // schedtype
1652     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1653     PtrTy,                                     // p_lower
1654     PtrTy,                                     // p_upper
1655     PtrTy,                                     // p_stride
1656     ITy,                                       // incr
1657     ITy                                        // chunk
1658   };
1659   llvm::FunctionType *FnTy =
1660       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1661   return CGM.CreateRuntimeFunction(FnTy, Name);
1662 }
1663 
1664 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
1665                                                             bool IVSigned) {
1666   assert((IVSize == 32 || IVSize == 64) &&
1667          "IV size is not compatible with the omp runtime");
1668   auto Name =
1669       IVSize == 32
1670           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1671           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1672   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1673   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1674                                CGM.Int32Ty,           // tid
1675                                CGM.Int32Ty,           // schedtype
1676                                ITy,                   // lower
1677                                ITy,                   // upper
1678                                ITy,                   // stride
1679                                ITy                    // chunk
1680   };
1681   llvm::FunctionType *FnTy =
1682       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1683   return CGM.CreateRuntimeFunction(FnTy, Name);
1684 }
1685 
1686 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
1687                                                             bool IVSigned) {
1688   assert((IVSize == 32 || IVSize == 64) &&
1689          "IV size is not compatible with the omp runtime");
1690   auto Name =
1691       IVSize == 32
1692           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1693           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1694   llvm::Type *TypeParams[] = {
1695       getIdentTyPointerTy(), // loc
1696       CGM.Int32Ty,           // tid
1697   };
1698   llvm::FunctionType *FnTy =
1699       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1700   return CGM.CreateRuntimeFunction(FnTy, Name);
1701 }
1702 
1703 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
1704                                                             bool IVSigned) {
1705   assert((IVSize == 32 || IVSize == 64) &&
1706          "IV size is not compatible with the omp runtime");
1707   auto Name =
1708       IVSize == 32
1709           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1710           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1711   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1712   auto PtrTy = llvm::PointerType::getUnqual(ITy);
1713   llvm::Type *TypeParams[] = {
1714     getIdentTyPointerTy(),                     // loc
1715     CGM.Int32Ty,                               // tid
1716     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1717     PtrTy,                                     // p_lower
1718     PtrTy,                                     // p_upper
1719     PtrTy                                      // p_stride
1720   };
1721   llvm::FunctionType *FnTy =
1722       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1723   return CGM.CreateRuntimeFunction(FnTy, Name);
1724 }
1725 
1726 llvm::Constant *
1727 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1728   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1729          !CGM.getContext().getTargetInfo().isTLSSupported());
1730   // Lookup the entry, lazily creating it if necessary.
1731   return getOrCreateInternalVariable(CGM.Int8PtrPtrTy,
1732                                      Twine(CGM.getMangledName(VD)) + ".cache.");
1733 }
1734 
1735 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1736                                                 const VarDecl *VD,
1737                                                 Address VDAddr,
1738                                                 SourceLocation Loc) {
1739   if (CGM.getLangOpts().OpenMPUseTLS &&
1740       CGM.getContext().getTargetInfo().isTLSSupported())
1741     return VDAddr;
1742 
1743   auto VarTy = VDAddr.getElementType();
1744   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1745                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1746                                                        CGM.Int8PtrTy),
1747                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1748                          getOrCreateThreadPrivateCache(VD)};
1749   return Address(CGF.EmitRuntimeCall(
1750       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
1751                  VDAddr.getAlignment());
1752 }
1753 
1754 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1755     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1756     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1757   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1758   // library.
1759   auto OMPLoc = emitUpdateLocation(CGF, Loc);
1760   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1761                       OMPLoc);
1762   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1763   // to register constructor/destructor for variable.
1764   llvm::Value *Args[] = {OMPLoc,
1765                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1766                                                        CGM.VoidPtrTy),
1767                          Ctor, CopyCtor, Dtor};
1768   CGF.EmitRuntimeCall(
1769       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
1770 }
1771 
1772 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1773     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1774     bool PerformInit, CodeGenFunction *CGF) {
1775   if (CGM.getLangOpts().OpenMPUseTLS &&
1776       CGM.getContext().getTargetInfo().isTLSSupported())
1777     return nullptr;
1778 
1779   VD = VD->getDefinition(CGM.getContext());
1780   if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
1781     ThreadPrivateWithDefinition.insert(VD);
1782     QualType ASTTy = VD->getType();
1783 
1784     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1785     auto Init = VD->getAnyInitializer();
1786     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1787       // Generate function that re-emits the declaration's initializer into the
1788       // threadprivate copy of the variable VD
1789       CodeGenFunction CtorCGF(CGM);
1790       FunctionArgList Args;
1791       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
1792                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
1793       Args.push_back(&Dst);
1794 
1795       auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1796           CGM.getContext().VoidPtrTy, Args);
1797       auto FTy = CGM.getTypes().GetFunctionType(FI);
1798       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
1799           FTy, ".__kmpc_global_ctor_.", FI, Loc);
1800       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1801                             Args, SourceLocation());
1802       auto ArgVal = CtorCGF.EmitLoadOfScalar(
1803           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1804           CGM.getContext().VoidPtrTy, Dst.getLocation());
1805       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1806       Arg = CtorCGF.Builder.CreateElementBitCast(Arg,
1807                                              CtorCGF.ConvertTypeForMem(ASTTy));
1808       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1809                                /*IsInitializer=*/true);
1810       ArgVal = CtorCGF.EmitLoadOfScalar(
1811           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1812           CGM.getContext().VoidPtrTy, Dst.getLocation());
1813       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1814       CtorCGF.FinishFunction();
1815       Ctor = Fn;
1816     }
1817     if (VD->getType().isDestructedType() != QualType::DK_none) {
1818       // Generate function that emits destructor call for the threadprivate copy
1819       // of the variable VD
1820       CodeGenFunction DtorCGF(CGM);
1821       FunctionArgList Args;
1822       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
1823                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
1824       Args.push_back(&Dst);
1825 
1826       auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1827           CGM.getContext().VoidTy, Args);
1828       auto FTy = CGM.getTypes().GetFunctionType(FI);
1829       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
1830           FTy, ".__kmpc_global_dtor_.", FI, Loc);
1831       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1832       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1833                             SourceLocation());
1834       // Create a scope with an artificial location for the body of this function.
1835       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1836       auto ArgVal = DtorCGF.EmitLoadOfScalar(
1837           DtorCGF.GetAddrOfLocalVar(&Dst),
1838           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1839       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1840                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1841                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1842       DtorCGF.FinishFunction();
1843       Dtor = Fn;
1844     }
1845     // Do not emit init function if it is not required.
1846     if (!Ctor && !Dtor)
1847       return nullptr;
1848 
1849     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1850     auto CopyCtorTy =
1851         llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1852                                 /*isVarArg=*/false)->getPointerTo();
1853     // Copying constructor for the threadprivate variable.
1854     // Must be NULL - reserved by runtime, but currently it requires that this
1855     // parameter is always NULL. Otherwise it fires assertion.
1856     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1857     if (Ctor == nullptr) {
1858       auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1859                                             /*isVarArg=*/false)->getPointerTo();
1860       Ctor = llvm::Constant::getNullValue(CtorTy);
1861     }
1862     if (Dtor == nullptr) {
1863       auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1864                                             /*isVarArg=*/false)->getPointerTo();
1865       Dtor = llvm::Constant::getNullValue(DtorTy);
1866     }
1867     if (!CGF) {
1868       auto InitFunctionTy =
1869           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1870       auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
1871           InitFunctionTy, ".__omp_threadprivate_init_.",
1872           CGM.getTypes().arrangeNullaryFunction());
1873       CodeGenFunction InitCGF(CGM);
1874       FunctionArgList ArgList;
1875       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1876                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1877                             Loc);
1878       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1879       InitCGF.FinishFunction();
1880       return InitFunction;
1881     }
1882     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1883   }
1884   return nullptr;
1885 }
1886 
1887 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
1888 /// function. Here is the logic:
1889 /// if (Cond) {
1890 ///   ThenGen();
1891 /// } else {
1892 ///   ElseGen();
1893 /// }
1894 static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
1895                             const RegionCodeGenTy &ThenGen,
1896                             const RegionCodeGenTy &ElseGen) {
1897   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1898 
1899   // If the condition constant folds and can be elided, try to avoid emitting
1900   // the condition and the dead arm of the if/else.
1901   bool CondConstant;
1902   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1903     if (CondConstant)
1904       ThenGen(CGF);
1905     else
1906       ElseGen(CGF);
1907     return;
1908   }
1909 
1910   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
1911   // emit the conditional branch.
1912   auto ThenBlock = CGF.createBasicBlock("omp_if.then");
1913   auto ElseBlock = CGF.createBasicBlock("omp_if.else");
1914   auto ContBlock = CGF.createBasicBlock("omp_if.end");
1915   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1916 
1917   // Emit the 'then' code.
1918   CGF.EmitBlock(ThenBlock);
1919   ThenGen(CGF);
1920   CGF.EmitBranch(ContBlock);
1921   // Emit the 'else' code if present.
1922   // There is no need to emit line number for unconditional branch.
1923   (void)ApplyDebugLocation::CreateEmpty(CGF);
1924   CGF.EmitBlock(ElseBlock);
1925   ElseGen(CGF);
1926   // There is no need to emit line number for unconditional branch.
1927   (void)ApplyDebugLocation::CreateEmpty(CGF);
1928   CGF.EmitBranch(ContBlock);
1929   // Emit the continuation block for code after the if.
1930   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1931 }
1932 
1933 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1934                                        llvm::Value *OutlinedFn,
1935                                        ArrayRef<llvm::Value *> CapturedVars,
1936                                        const Expr *IfCond) {
1937   if (!CGF.HaveInsertPoint())
1938     return;
1939   auto *RTLoc = emitUpdateLocation(CGF, Loc);
1940   auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
1941                                                      PrePostActionTy &) {
1942     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1943     auto &RT = CGF.CGM.getOpenMPRuntime();
1944     llvm::Value *Args[] = {
1945         RTLoc,
1946         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1947         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
1948     llvm::SmallVector<llvm::Value *, 16> RealArgs;
1949     RealArgs.append(std::begin(Args), std::end(Args));
1950     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1951 
1952     auto RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
1953     CGF.EmitRuntimeCall(RTLFn, RealArgs);
1954   };
1955   auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
1956                                                           PrePostActionTy &) {
1957     auto &RT = CGF.CGM.getOpenMPRuntime();
1958     auto ThreadID = RT.getThreadID(CGF, Loc);
1959     // Build calls:
1960     // __kmpc_serialized_parallel(&Loc, GTid);
1961     llvm::Value *Args[] = {RTLoc, ThreadID};
1962     CGF.EmitRuntimeCall(
1963         RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
1964 
1965     // OutlinedFn(&GTid, &zero, CapturedStruct);
1966     auto ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1967     Address ZeroAddr =
1968         CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
1969                              /*Name*/ ".zero.addr");
1970     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
1971     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
1972     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
1973     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
1974     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1975     CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
1976 
1977     // __kmpc_end_serialized_parallel(&Loc, GTid);
1978     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1979     CGF.EmitRuntimeCall(
1980         RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
1981         EndArgs);
1982   };
1983   if (IfCond)
1984     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
1985   else {
1986     RegionCodeGenTy ThenRCG(ThenGen);
1987     ThenRCG(CGF);
1988   }
1989 }
1990 
1991 // If we're inside an (outlined) parallel region, use the region info's
1992 // thread-ID variable (it is passed in a first argument of the outlined function
1993 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1994 // regular serial code region, get thread ID by calling kmp_int32
1995 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1996 // return the address of that temp.
1997 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
1998                                              SourceLocation Loc) {
1999   if (auto *OMPRegionInfo =
2000           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2001     if (OMPRegionInfo->getThreadIDVariable())
2002       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
2003 
2004   auto ThreadID = getThreadID(CGF, Loc);
2005   auto Int32Ty =
2006       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2007   auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2008   CGF.EmitStoreOfScalar(ThreadID,
2009                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2010 
2011   return ThreadIDTemp;
2012 }
2013 
2014 llvm::Constant *
2015 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
2016                                              const llvm::Twine &Name) {
2017   SmallString<256> Buffer;
2018   llvm::raw_svector_ostream Out(Buffer);
2019   Out << Name;
2020   auto RuntimeName = Out.str();
2021   auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
2022   if (Elem.second) {
2023     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2024            "OMP internal variable has different type than requested");
2025     return &*Elem.second;
2026   }
2027 
2028   return Elem.second = new llvm::GlobalVariable(
2029              CGM.getModule(), Ty, /*IsConstant*/ false,
2030              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2031              Elem.first());
2032 }
2033 
2034 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2035   llvm::Twine Name(".gomp_critical_user_", CriticalName);
2036   return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
2037 }
2038 
2039 namespace {
2040 /// Common pre(post)-action for different OpenMP constructs.
2041 class CommonActionTy final : public PrePostActionTy {
2042   llvm::Value *EnterCallee;
2043   ArrayRef<llvm::Value *> EnterArgs;
2044   llvm::Value *ExitCallee;
2045   ArrayRef<llvm::Value *> ExitArgs;
2046   bool Conditional;
2047   llvm::BasicBlock *ContBlock = nullptr;
2048 
2049 public:
2050   CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs,
2051                  llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs,
2052                  bool Conditional = false)
2053       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2054         ExitArgs(ExitArgs), Conditional(Conditional) {}
2055   void Enter(CodeGenFunction &CGF) override {
2056     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2057     if (Conditional) {
2058       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2059       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2060       ContBlock = CGF.createBasicBlock("omp_if.end");
2061       // Generate the branch (If-stmt)
2062       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2063       CGF.EmitBlock(ThenBlock);
2064     }
2065   }
2066   void Done(CodeGenFunction &CGF) {
2067     // Emit the rest of blocks/branches
2068     CGF.EmitBranch(ContBlock);
2069     CGF.EmitBlock(ContBlock, true);
2070   }
2071   void Exit(CodeGenFunction &CGF) override {
2072     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2073   }
2074 };
2075 } // anonymous namespace
2076 
2077 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2078                                          StringRef CriticalName,
2079                                          const RegionCodeGenTy &CriticalOpGen,
2080                                          SourceLocation Loc, const Expr *Hint) {
2081   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2082   // CriticalOpGen();
2083   // __kmpc_end_critical(ident_t *, gtid, Lock);
2084   // Prepare arguments and build a call to __kmpc_critical
2085   if (!CGF.HaveInsertPoint())
2086     return;
2087   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2088                          getCriticalRegionLock(CriticalName)};
2089   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2090                                                 std::end(Args));
2091   if (Hint) {
2092     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2093         CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
2094   }
2095   CommonActionTy Action(
2096       createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
2097                                  : OMPRTL__kmpc_critical),
2098       EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
2099   CriticalOpGen.setAction(Action);
2100   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2101 }
2102 
2103 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2104                                        const RegionCodeGenTy &MasterOpGen,
2105                                        SourceLocation Loc) {
2106   if (!CGF.HaveInsertPoint())
2107     return;
2108   // if(__kmpc_master(ident_t *, gtid)) {
2109   //   MasterOpGen();
2110   //   __kmpc_end_master(ident_t *, gtid);
2111   // }
2112   // Prepare arguments and build a call to __kmpc_master
2113   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2114   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
2115                         createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
2116                         /*Conditional=*/true);
2117   MasterOpGen.setAction(Action);
2118   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2119   Action.Done(CGF);
2120 }
2121 
2122 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2123                                         SourceLocation Loc) {
2124   if (!CGF.HaveInsertPoint())
2125     return;
2126   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2127   llvm::Value *Args[] = {
2128       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2129       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2130   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
2131   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2132     Region->emitUntiedSwitch(CGF);
2133 }
2134 
2135 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2136                                           const RegionCodeGenTy &TaskgroupOpGen,
2137                                           SourceLocation Loc) {
2138   if (!CGF.HaveInsertPoint())
2139     return;
2140   // __kmpc_taskgroup(ident_t *, gtid);
2141   // TaskgroupOpGen();
2142   // __kmpc_end_taskgroup(ident_t *, gtid);
2143   // Prepare arguments and build a call to __kmpc_taskgroup
2144   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2145   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
2146                         createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
2147                         Args);
2148   TaskgroupOpGen.setAction(Action);
2149   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2150 }
2151 
2152 /// Given an array of pointers to variables, project the address of a
2153 /// given variable.
2154 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2155                                       unsigned Index, const VarDecl *Var) {
2156   // Pull out the pointer to the variable.
2157   Address PtrAddr =
2158       CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize());
2159   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2160 
2161   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2162   Addr = CGF.Builder.CreateElementBitCast(
2163       Addr, CGF.ConvertTypeForMem(Var->getType()));
2164   return Addr;
2165 }
2166 
2167 static llvm::Value *emitCopyprivateCopyFunction(
2168     CodeGenModule &CGM, llvm::Type *ArgsType,
2169     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2170     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) {
2171   auto &C = CGM.getContext();
2172   // void copy_func(void *LHSArg, void *RHSArg);
2173   FunctionArgList Args;
2174   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
2175                            C.VoidPtrTy);
2176   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
2177                            C.VoidPtrTy);
2178   Args.push_back(&LHSArg);
2179   Args.push_back(&RHSArg);
2180   auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2181   auto *Fn = llvm::Function::Create(
2182       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
2183       ".omp.copyprivate.copy_func", &CGM.getModule());
2184   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
2185   CodeGenFunction CGF(CGM);
2186   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
2187   // Dest = (void*[n])(LHSArg);
2188   // Src = (void*[n])(RHSArg);
2189   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2190       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2191       ArgsType), CGF.getPointerAlign());
2192   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2193       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2194       ArgsType), CGF.getPointerAlign());
2195   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2196   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2197   // ...
2198   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2199   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2200     auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2201     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2202 
2203     auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2204     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2205 
2206     auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2207     QualType Type = VD->getType();
2208     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2209   }
2210   CGF.FinishFunction();
2211   return Fn;
2212 }
2213 
2214 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2215                                        const RegionCodeGenTy &SingleOpGen,
2216                                        SourceLocation Loc,
2217                                        ArrayRef<const Expr *> CopyprivateVars,
2218                                        ArrayRef<const Expr *> SrcExprs,
2219                                        ArrayRef<const Expr *> DstExprs,
2220                                        ArrayRef<const Expr *> AssignmentOps) {
2221   if (!CGF.HaveInsertPoint())
2222     return;
2223   assert(CopyprivateVars.size() == SrcExprs.size() &&
2224          CopyprivateVars.size() == DstExprs.size() &&
2225          CopyprivateVars.size() == AssignmentOps.size());
2226   auto &C = CGM.getContext();
2227   // int32 did_it = 0;
2228   // if(__kmpc_single(ident_t *, gtid)) {
2229   //   SingleOpGen();
2230   //   __kmpc_end_single(ident_t *, gtid);
2231   //   did_it = 1;
2232   // }
2233   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2234   // <copy_func>, did_it);
2235 
2236   Address DidIt = Address::invalid();
2237   if (!CopyprivateVars.empty()) {
2238     // int32 did_it = 0;
2239     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2240     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2241     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2242   }
2243   // Prepare arguments and build a call to __kmpc_single
2244   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2245   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
2246                         createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
2247                         /*Conditional=*/true);
2248   SingleOpGen.setAction(Action);
2249   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2250   if (DidIt.isValid()) {
2251     // did_it = 1;
2252     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2253   }
2254   Action.Done(CGF);
2255   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2256   // <copy_func>, did_it);
2257   if (DidIt.isValid()) {
2258     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2259     auto CopyprivateArrayTy =
2260         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
2261                                /*IndexTypeQuals=*/0);
2262     // Create a list of all private variables for copyprivate.
2263     Address CopyprivateList =
2264         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2265     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2266       Address Elem = CGF.Builder.CreateConstArrayGEP(
2267           CopyprivateList, I, CGF.getPointerSize());
2268       CGF.Builder.CreateStore(
2269           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2270               CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
2271           Elem);
2272     }
2273     // Build function that copies private values from single region to all other
2274     // threads in the corresponding parallel region.
2275     auto *CpyFn = emitCopyprivateCopyFunction(
2276         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2277         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
2278     auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2279     Address CL =
2280       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2281                                                       CGF.VoidPtrTy);
2282     auto *DidItVal = CGF.Builder.CreateLoad(DidIt);
2283     llvm::Value *Args[] = {
2284         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2285         getThreadID(CGF, Loc),        // i32 <gtid>
2286         BufSize,                      // size_t <buf_size>
2287         CL.getPointer(),              // void *<copyprivate list>
2288         CpyFn,                        // void (*) (void *, void *) <copy_func>
2289         DidItVal                      // i32 did_it
2290     };
2291     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
2292   }
2293 }
2294 
2295 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2296                                         const RegionCodeGenTy &OrderedOpGen,
2297                                         SourceLocation Loc, bool IsThreads) {
2298   if (!CGF.HaveInsertPoint())
2299     return;
2300   // __kmpc_ordered(ident_t *, gtid);
2301   // OrderedOpGen();
2302   // __kmpc_end_ordered(ident_t *, gtid);
2303   // Prepare arguments and build a call to __kmpc_ordered
2304   if (IsThreads) {
2305     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2306     CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
2307                           createRuntimeFunction(OMPRTL__kmpc_end_ordered),
2308                           Args);
2309     OrderedOpGen.setAction(Action);
2310     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2311     return;
2312   }
2313   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2314 }
2315 
2316 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2317                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2318                                       bool ForceSimpleCall) {
2319   if (!CGF.HaveInsertPoint())
2320     return;
2321   // Build call __kmpc_cancel_barrier(loc, thread_id);
2322   // Build call __kmpc_barrier(loc, thread_id);
2323   unsigned Flags;
2324   if (Kind == OMPD_for)
2325     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2326   else if (Kind == OMPD_sections)
2327     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2328   else if (Kind == OMPD_single)
2329     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2330   else if (Kind == OMPD_barrier)
2331     Flags = OMP_IDENT_BARRIER_EXPL;
2332   else
2333     Flags = OMP_IDENT_BARRIER_IMPL;
2334   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2335   // thread_id);
2336   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2337                          getThreadID(CGF, Loc)};
2338   if (auto *OMPRegionInfo =
2339           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
2340     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2341       auto *Result = CGF.EmitRuntimeCall(
2342           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
2343       if (EmitChecks) {
2344         // if (__kmpc_cancel_barrier()) {
2345         //   exit from construct;
2346         // }
2347         auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
2348         auto *ContBB = CGF.createBasicBlock(".cancel.continue");
2349         auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
2350         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2351         CGF.EmitBlock(ExitBB);
2352         //   exit from construct;
2353         auto CancelDestination =
2354             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2355         CGF.EmitBranchThroughCleanup(CancelDestination);
2356         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2357       }
2358       return;
2359     }
2360   }
2361   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
2362 }
2363 
2364 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
2365 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2366                                           bool Chunked, bool Ordered) {
2367   switch (ScheduleKind) {
2368   case OMPC_SCHEDULE_static:
2369     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2370                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2371   case OMPC_SCHEDULE_dynamic:
2372     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2373   case OMPC_SCHEDULE_guided:
2374     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2375   case OMPC_SCHEDULE_runtime:
2376     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2377   case OMPC_SCHEDULE_auto:
2378     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2379   case OMPC_SCHEDULE_unknown:
2380     assert(!Chunked && "chunk was specified but schedule kind not known");
2381     return Ordered ? OMP_ord_static : OMP_sch_static;
2382   }
2383   llvm_unreachable("Unexpected runtime schedule");
2384 }
2385 
2386 /// \brief Map the OpenMP distribute schedule to the runtime enumeration.
2387 static OpenMPSchedType
2388 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2389   // only static is allowed for dist_schedule
2390   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2391 }
2392 
2393 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2394                                          bool Chunked) const {
2395   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2396   return Schedule == OMP_sch_static;
2397 }
2398 
2399 bool CGOpenMPRuntime::isStaticNonchunked(
2400     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2401   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2402   return Schedule == OMP_dist_sch_static;
2403 }
2404 
2405 
2406 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2407   auto Schedule =
2408       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2409   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2410   return Schedule != OMP_sch_static;
2411 }
2412 
2413 static int addMonoNonMonoModifier(OpenMPSchedType Schedule,
2414                                   OpenMPScheduleClauseModifier M1,
2415                                   OpenMPScheduleClauseModifier M2) {
2416   int Modifier = 0;
2417   switch (M1) {
2418   case OMPC_SCHEDULE_MODIFIER_monotonic:
2419     Modifier = OMP_sch_modifier_monotonic;
2420     break;
2421   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2422     Modifier = OMP_sch_modifier_nonmonotonic;
2423     break;
2424   case OMPC_SCHEDULE_MODIFIER_simd:
2425     if (Schedule == OMP_sch_static_chunked)
2426       Schedule = OMP_sch_static_balanced_chunked;
2427     break;
2428   case OMPC_SCHEDULE_MODIFIER_last:
2429   case OMPC_SCHEDULE_MODIFIER_unknown:
2430     break;
2431   }
2432   switch (M2) {
2433   case OMPC_SCHEDULE_MODIFIER_monotonic:
2434     Modifier = OMP_sch_modifier_monotonic;
2435     break;
2436   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2437     Modifier = OMP_sch_modifier_nonmonotonic;
2438     break;
2439   case OMPC_SCHEDULE_MODIFIER_simd:
2440     if (Schedule == OMP_sch_static_chunked)
2441       Schedule = OMP_sch_static_balanced_chunked;
2442     break;
2443   case OMPC_SCHEDULE_MODIFIER_last:
2444   case OMPC_SCHEDULE_MODIFIER_unknown:
2445     break;
2446   }
2447   return Schedule | Modifier;
2448 }
2449 
2450 void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF,
2451                                           SourceLocation Loc,
2452                                           const OpenMPScheduleTy &ScheduleKind,
2453                                           unsigned IVSize, bool IVSigned,
2454                                           bool Ordered, llvm::Value *UB,
2455                                           llvm::Value *Chunk) {
2456   if (!CGF.HaveInsertPoint())
2457     return;
2458   OpenMPSchedType Schedule =
2459       getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered);
2460   assert(Ordered ||
2461          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2462           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2463           Schedule != OMP_sch_static_balanced_chunked));
2464   // Call __kmpc_dispatch_init(
2465   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2466   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2467   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2468 
2469   // If the Chunk was not specified in the clause - use default value 1.
2470   if (Chunk == nullptr)
2471     Chunk = CGF.Builder.getIntN(IVSize, 1);
2472   llvm::Value *Args[] = {
2473       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2474       CGF.Builder.getInt32(addMonoNonMonoModifier(
2475           Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2476       CGF.Builder.getIntN(IVSize, 0),                   // Lower
2477       UB,                                               // Upper
2478       CGF.Builder.getIntN(IVSize, 1),                   // Stride
2479       Chunk                                             // Chunk
2480   };
2481   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2482 }
2483 
2484 static void emitForStaticInitCall(
2485     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2486     llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule,
2487     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2488     unsigned IVSize, bool Ordered, Address IL, Address LB, Address UB,
2489     Address ST, llvm::Value *Chunk) {
2490   if (!CGF.HaveInsertPoint())
2491      return;
2492 
2493    assert(!Ordered);
2494    assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2495           Schedule == OMP_sch_static_balanced_chunked ||
2496           Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2497           Schedule == OMP_dist_sch_static ||
2498           Schedule == OMP_dist_sch_static_chunked);
2499 
2500    // Call __kmpc_for_static_init(
2501    //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2502    //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2503    //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2504    //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2505    if (Chunk == nullptr) {
2506      assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2507              Schedule == OMP_dist_sch_static) &&
2508             "expected static non-chunked schedule");
2509      // If the Chunk was not specified in the clause - use default value 1.
2510        Chunk = CGF.Builder.getIntN(IVSize, 1);
2511    } else {
2512      assert((Schedule == OMP_sch_static_chunked ||
2513              Schedule == OMP_sch_static_balanced_chunked ||
2514              Schedule == OMP_ord_static_chunked ||
2515              Schedule == OMP_dist_sch_static_chunked) &&
2516             "expected static chunked schedule");
2517    }
2518    llvm::Value *Args[] = {
2519        UpdateLocation, ThreadId, CGF.Builder.getInt32(addMonoNonMonoModifier(
2520                                      Schedule, M1, M2)), // Schedule type
2521        IL.getPointer(),                                  // &isLastIter
2522        LB.getPointer(),                                  // &LB
2523        UB.getPointer(),                                  // &UB
2524        ST.getPointer(),                                  // &Stride
2525        CGF.Builder.getIntN(IVSize, 1),                   // Incr
2526        Chunk                                             // Chunk
2527    };
2528    CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2529 }
2530 
2531 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2532                                         SourceLocation Loc,
2533                                         const OpenMPScheduleTy &ScheduleKind,
2534                                         unsigned IVSize, bool IVSigned,
2535                                         bool Ordered, Address IL, Address LB,
2536                                         Address UB, Address ST,
2537                                         llvm::Value *Chunk) {
2538   OpenMPSchedType ScheduleNum =
2539       getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered);
2540   auto *UpdatedLocation = emitUpdateLocation(CGF, Loc);
2541   auto *ThreadId = getThreadID(CGF, Loc);
2542   auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned);
2543   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2544                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, IVSize,
2545                         Ordered, IL, LB, UB, ST, Chunk);
2546 }
2547 
2548 void CGOpenMPRuntime::emitDistributeStaticInit(
2549     CodeGenFunction &CGF, SourceLocation Loc,
2550     OpenMPDistScheduleClauseKind SchedKind, unsigned IVSize, bool IVSigned,
2551     bool Ordered, Address IL, Address LB, Address UB, Address ST,
2552     llvm::Value *Chunk) {
2553   OpenMPSchedType ScheduleNum = getRuntimeSchedule(SchedKind, Chunk != nullptr);
2554   auto *UpdatedLocation = emitUpdateLocation(CGF, Loc);
2555   auto *ThreadId = getThreadID(CGF, Loc);
2556   auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned);
2557   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2558                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2559                         OMPC_SCHEDULE_MODIFIER_unknown, IVSize, Ordered, IL, LB,
2560                         UB, ST, Chunk);
2561 }
2562 
2563 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2564                                           SourceLocation Loc) {
2565   if (!CGF.HaveInsertPoint())
2566     return;
2567   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2568   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2569   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
2570                       Args);
2571 }
2572 
2573 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2574                                                  SourceLocation Loc,
2575                                                  unsigned IVSize,
2576                                                  bool IVSigned) {
2577   if (!CGF.HaveInsertPoint())
2578     return;
2579   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2580   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2581   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2582 }
2583 
2584 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2585                                           SourceLocation Loc, unsigned IVSize,
2586                                           bool IVSigned, Address IL,
2587                                           Address LB, Address UB,
2588                                           Address ST) {
2589   // Call __kmpc_dispatch_next(
2590   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2591   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2592   //          kmp_int[32|64] *p_stride);
2593   llvm::Value *Args[] = {
2594       emitUpdateLocation(CGF, Loc),
2595       getThreadID(CGF, Loc),
2596       IL.getPointer(), // &isLastIter
2597       LB.getPointer(), // &Lower
2598       UB.getPointer(), // &Upper
2599       ST.getPointer()  // &Stride
2600   };
2601   llvm::Value *Call =
2602       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2603   return CGF.EmitScalarConversion(
2604       Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true),
2605       CGF.getContext().BoolTy, Loc);
2606 }
2607 
2608 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2609                                            llvm::Value *NumThreads,
2610                                            SourceLocation Loc) {
2611   if (!CGF.HaveInsertPoint())
2612     return;
2613   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2614   llvm::Value *Args[] = {
2615       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2616       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2617   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
2618                       Args);
2619 }
2620 
2621 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2622                                          OpenMPProcBindClauseKind ProcBind,
2623                                          SourceLocation Loc) {
2624   if (!CGF.HaveInsertPoint())
2625     return;
2626   // Constants for proc bind value accepted by the runtime.
2627   enum ProcBindTy {
2628     ProcBindFalse = 0,
2629     ProcBindTrue,
2630     ProcBindMaster,
2631     ProcBindClose,
2632     ProcBindSpread,
2633     ProcBindIntel,
2634     ProcBindDefault
2635   } RuntimeProcBind;
2636   switch (ProcBind) {
2637   case OMPC_PROC_BIND_master:
2638     RuntimeProcBind = ProcBindMaster;
2639     break;
2640   case OMPC_PROC_BIND_close:
2641     RuntimeProcBind = ProcBindClose;
2642     break;
2643   case OMPC_PROC_BIND_spread:
2644     RuntimeProcBind = ProcBindSpread;
2645     break;
2646   case OMPC_PROC_BIND_unknown:
2647     llvm_unreachable("Unsupported proc_bind value.");
2648   }
2649   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2650   llvm::Value *Args[] = {
2651       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2652       llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
2653   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
2654 }
2655 
2656 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2657                                 SourceLocation Loc) {
2658   if (!CGF.HaveInsertPoint())
2659     return;
2660   // Build call void __kmpc_flush(ident_t *loc)
2661   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
2662                       emitUpdateLocation(CGF, Loc));
2663 }
2664 
2665 namespace {
2666 /// \brief Indexes of fields for type kmp_task_t.
2667 enum KmpTaskTFields {
2668   /// \brief List of shared variables.
2669   KmpTaskTShareds,
2670   /// \brief Task routine.
2671   KmpTaskTRoutine,
2672   /// \brief Partition id for the untied tasks.
2673   KmpTaskTPartId,
2674   /// Function with call of destructors for private variables.
2675   Data1,
2676   /// Task priority.
2677   Data2,
2678   /// (Taskloops only) Lower bound.
2679   KmpTaskTLowerBound,
2680   /// (Taskloops only) Upper bound.
2681   KmpTaskTUpperBound,
2682   /// (Taskloops only) Stride.
2683   KmpTaskTStride,
2684   /// (Taskloops only) Is last iteration flag.
2685   KmpTaskTLastIter,
2686 };
2687 } // anonymous namespace
2688 
2689 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2690   // FIXME: Add other entries type when they become supported.
2691   return OffloadEntriesTargetRegion.empty();
2692 }
2693 
2694 /// \brief Initialize target region entry.
2695 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2696     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2697                                     StringRef ParentName, unsigned LineNum,
2698                                     unsigned Order) {
2699   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2700                                              "only required for the device "
2701                                              "code generation.");
2702   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2703       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr);
2704   ++OffloadingEntriesNum;
2705 }
2706 
2707 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2708     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2709                                   StringRef ParentName, unsigned LineNum,
2710                                   llvm::Constant *Addr, llvm::Constant *ID) {
2711   // If we are emitting code for a target, the entry is already initialized,
2712   // only has to be registered.
2713   if (CGM.getLangOpts().OpenMPIsDevice) {
2714     assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
2715            "Entry must exist.");
2716     auto &Entry =
2717         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
2718     assert(Entry.isValid() && "Entry not initialized!");
2719     Entry.setAddress(Addr);
2720     Entry.setID(ID);
2721     return;
2722   } else {
2723     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID);
2724     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
2725   }
2726 }
2727 
2728 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
2729     unsigned DeviceID, unsigned FileID, StringRef ParentName,
2730     unsigned LineNum) const {
2731   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
2732   if (PerDevice == OffloadEntriesTargetRegion.end())
2733     return false;
2734   auto PerFile = PerDevice->second.find(FileID);
2735   if (PerFile == PerDevice->second.end())
2736     return false;
2737   auto PerParentName = PerFile->second.find(ParentName);
2738   if (PerParentName == PerFile->second.end())
2739     return false;
2740   auto PerLine = PerParentName->second.find(LineNum);
2741   if (PerLine == PerParentName->second.end())
2742     return false;
2743   // Fail if this entry is already registered.
2744   if (PerLine->second.getAddress() || PerLine->second.getID())
2745     return false;
2746   return true;
2747 }
2748 
2749 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
2750     const OffloadTargetRegionEntryInfoActTy &Action) {
2751   // Scan all target region entries and perform the provided action.
2752   for (auto &D : OffloadEntriesTargetRegion)
2753     for (auto &F : D.second)
2754       for (auto &P : F.second)
2755         for (auto &L : P.second)
2756           Action(D.first, F.first, P.first(), L.first, L.second);
2757 }
2758 
2759 /// \brief Create a Ctor/Dtor-like function whose body is emitted through
2760 /// \a Codegen. This is used to emit the two functions that register and
2761 /// unregister the descriptor of the current compilation unit.
2762 static llvm::Function *
2763 createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name,
2764                                          const RegionCodeGenTy &Codegen) {
2765   auto &C = CGM.getContext();
2766   FunctionArgList Args;
2767   ImplicitParamDecl DummyPtr(C, /*DC=*/nullptr, SourceLocation(),
2768                              /*Id=*/nullptr, C.VoidPtrTy);
2769   Args.push_back(&DummyPtr);
2770 
2771   CodeGenFunction CGF(CGM);
2772   GlobalDecl();
2773   auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2774   auto FTy = CGM.getTypes().GetFunctionType(FI);
2775   auto *Fn =
2776       CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation());
2777   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args, SourceLocation());
2778   Codegen(CGF);
2779   CGF.FinishFunction();
2780   return Fn;
2781 }
2782 
2783 llvm::Function *
2784 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
2785 
2786   // If we don't have entries or if we are emitting code for the device, we
2787   // don't need to do anything.
2788   if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
2789     return nullptr;
2790 
2791   auto &M = CGM.getModule();
2792   auto &C = CGM.getContext();
2793 
2794   // Get list of devices we care about
2795   auto &Devices = CGM.getLangOpts().OMPTargetTriples;
2796 
2797   // We should be creating an offloading descriptor only if there are devices
2798   // specified.
2799   assert(!Devices.empty() && "No OpenMP offloading devices??");
2800 
2801   // Create the external variables that will point to the begin and end of the
2802   // host entries section. These will be defined by the linker.
2803   auto *OffloadEntryTy =
2804       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
2805   llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable(
2806       M, OffloadEntryTy, /*isConstant=*/true,
2807       llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
2808       ".omp_offloading.entries_begin");
2809   llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable(
2810       M, OffloadEntryTy, /*isConstant=*/true,
2811       llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
2812       ".omp_offloading.entries_end");
2813 
2814   // Create all device images
2815   auto *DeviceImageTy = cast<llvm::StructType>(
2816       CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy()));
2817   ConstantInitBuilder DeviceImagesBuilder(CGM);
2818   auto DeviceImagesEntries = DeviceImagesBuilder.beginArray(DeviceImageTy);
2819 
2820   for (unsigned i = 0; i < Devices.size(); ++i) {
2821     StringRef T = Devices[i].getTriple();
2822     auto *ImgBegin = new llvm::GlobalVariable(
2823         M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
2824         /*Initializer=*/nullptr,
2825         Twine(".omp_offloading.img_start.") + Twine(T));
2826     auto *ImgEnd = new llvm::GlobalVariable(
2827         M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
2828         /*Initializer=*/nullptr, Twine(".omp_offloading.img_end.") + Twine(T));
2829 
2830     auto Dev = DeviceImagesEntries.beginStruct(DeviceImageTy);
2831     Dev.add(ImgBegin);
2832     Dev.add(ImgEnd);
2833     Dev.add(HostEntriesBegin);
2834     Dev.add(HostEntriesEnd);
2835     Dev.finishAndAddTo(DeviceImagesEntries);
2836   }
2837 
2838   // Create device images global array.
2839   llvm::GlobalVariable *DeviceImages =
2840     DeviceImagesEntries.finishAndCreateGlobal(".omp_offloading.device_images",
2841                                               CGM.getPointerAlign(),
2842                                               /*isConstant=*/true);
2843   DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
2844 
2845   // This is a Zero array to be used in the creation of the constant expressions
2846   llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
2847                              llvm::Constant::getNullValue(CGM.Int32Ty)};
2848 
2849   // Create the target region descriptor.
2850   auto *BinaryDescriptorTy = cast<llvm::StructType>(
2851       CGM.getTypes().ConvertTypeForMem(getTgtBinaryDescriptorQTy()));
2852   ConstantInitBuilder DescBuilder(CGM);
2853   auto DescInit = DescBuilder.beginStruct(BinaryDescriptorTy);
2854   DescInit.addInt(CGM.Int32Ty, Devices.size());
2855   DescInit.add(llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(),
2856                                                     DeviceImages,
2857                                                     Index));
2858   DescInit.add(HostEntriesBegin);
2859   DescInit.add(HostEntriesEnd);
2860 
2861   auto *Desc = DescInit.finishAndCreateGlobal(".omp_offloading.descriptor",
2862                                               CGM.getPointerAlign(),
2863                                               /*isConstant=*/true);
2864 
2865   // Emit code to register or unregister the descriptor at execution
2866   // startup or closing, respectively.
2867 
2868   // Create a variable to drive the registration and unregistration of the
2869   // descriptor, so we can reuse the logic that emits Ctors and Dtors.
2870   auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var");
2871   ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(),
2872                                 IdentInfo, C.CharTy);
2873 
2874   auto *UnRegFn = createOffloadingBinaryDescriptorFunction(
2875       CGM, ".omp_offloading.descriptor_unreg",
2876       [&](CodeGenFunction &CGF, PrePostActionTy &) {
2877         CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
2878                              Desc);
2879       });
2880   auto *RegFn = createOffloadingBinaryDescriptorFunction(
2881       CGM, ".omp_offloading.descriptor_reg",
2882       [&](CodeGenFunction &CGF, PrePostActionTy &) {
2883         CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib),
2884                              Desc);
2885         CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
2886       });
2887   return RegFn;
2888 }
2889 
2890 void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *ID,
2891                                          llvm::Constant *Addr, uint64_t Size) {
2892   StringRef Name = Addr->getName();
2893   auto *TgtOffloadEntryType = cast<llvm::StructType>(
2894       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()));
2895   llvm::LLVMContext &C = CGM.getModule().getContext();
2896   llvm::Module &M = CGM.getModule();
2897 
2898   // Make sure the address has the right type.
2899   llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy);
2900 
2901   // Create constant string with the name.
2902   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
2903 
2904   llvm::GlobalVariable *Str =
2905       new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true,
2906                                llvm::GlobalValue::InternalLinkage, StrPtrInit,
2907                                ".omp_offloading.entry_name");
2908   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
2909   llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy);
2910 
2911   // We can't have any padding between symbols, so we need to have 1-byte
2912   // alignment.
2913   auto Align = CharUnits::fromQuantity(1);
2914 
2915   // Create the entry struct.
2916   ConstantInitBuilder EntryBuilder(CGM);
2917   auto EntryInit = EntryBuilder.beginStruct(TgtOffloadEntryType);
2918   EntryInit.add(AddrPtr);
2919   EntryInit.add(StrPtr);
2920   EntryInit.addInt(CGM.SizeTy, Size);
2921   llvm::GlobalVariable *Entry =
2922     EntryInit.finishAndCreateGlobal(".omp_offloading.entry",
2923                                     Align,
2924                                     /*constant*/ true,
2925                                     llvm::GlobalValue::ExternalLinkage);
2926 
2927   // The entry has to be created in the section the linker expects it to be.
2928   Entry->setSection(".omp_offloading.entries");
2929 }
2930 
2931 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2932   // Emit the offloading entries and metadata so that the device codegen side
2933   // can
2934   // easily figure out what to emit. The produced metadata looks like this:
2935   //
2936   // !omp_offload.info = !{!1, ...}
2937   //
2938   // Right now we only generate metadata for function that contain target
2939   // regions.
2940 
2941   // If we do not have entries, we dont need to do anything.
2942   if (OffloadEntriesInfoManager.empty())
2943     return;
2944 
2945   llvm::Module &M = CGM.getModule();
2946   llvm::LLVMContext &C = M.getContext();
2947   SmallVector<OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
2948       OrderedEntries(OffloadEntriesInfoManager.size());
2949 
2950   // Create the offloading info metadata node.
2951   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
2952 
2953   // Auxiliar methods to create metadata values and strings.
2954   auto getMDInt = [&](unsigned v) {
2955     return llvm::ConstantAsMetadata::get(
2956         llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v));
2957   };
2958 
2959   auto getMDString = [&](StringRef v) { return llvm::MDString::get(C, v); };
2960 
2961   // Create function that emits metadata for each target region entry;
2962   auto &&TargetRegionMetadataEmitter = [&](
2963       unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line,
2964       OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
2965     llvm::SmallVector<llvm::Metadata *, 32> Ops;
2966     // Generate metadata for target regions. Each entry of this metadata
2967     // contains:
2968     // - Entry 0 -> Kind of this type of metadata (0).
2969     // - Entry 1 -> Device ID of the file where the entry was identified.
2970     // - Entry 2 -> File ID of the file where the entry was identified.
2971     // - Entry 3 -> Mangled name of the function where the entry was identified.
2972     // - Entry 4 -> Line in the file where the entry was identified.
2973     // - Entry 5 -> Order the entry was created.
2974     // The first element of the metadata node is the kind.
2975     Ops.push_back(getMDInt(E.getKind()));
2976     Ops.push_back(getMDInt(DeviceID));
2977     Ops.push_back(getMDInt(FileID));
2978     Ops.push_back(getMDString(ParentName));
2979     Ops.push_back(getMDInt(Line));
2980     Ops.push_back(getMDInt(E.getOrder()));
2981 
2982     // Save this entry in the right position of the ordered entries array.
2983     OrderedEntries[E.getOrder()] = &E;
2984 
2985     // Add metadata to the named metadata node.
2986     MD->addOperand(llvm::MDNode::get(C, Ops));
2987   };
2988 
2989   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
2990       TargetRegionMetadataEmitter);
2991 
2992   for (auto *E : OrderedEntries) {
2993     assert(E && "All ordered entries must exist!");
2994     if (auto *CE =
2995             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
2996                 E)) {
2997       assert(CE->getID() && CE->getAddress() &&
2998              "Entry ID and Addr are invalid!");
2999       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0);
3000     } else
3001       llvm_unreachable("Unsupported entry kind.");
3002   }
3003 }
3004 
3005 /// \brief Loads all the offload entries information from the host IR
3006 /// metadata.
3007 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3008   // If we are in target mode, load the metadata from the host IR. This code has
3009   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3010 
3011   if (!CGM.getLangOpts().OpenMPIsDevice)
3012     return;
3013 
3014   if (CGM.getLangOpts().OMPHostIRFile.empty())
3015     return;
3016 
3017   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3018   if (Buf.getError())
3019     return;
3020 
3021   llvm::LLVMContext C;
3022   auto ME = expectedToErrorOrAndEmitErrors(
3023       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3024 
3025   if (ME.getError())
3026     return;
3027 
3028   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3029   if (!MD)
3030     return;
3031 
3032   for (auto I : MD->operands()) {
3033     llvm::MDNode *MN = cast<llvm::MDNode>(I);
3034 
3035     auto getMDInt = [&](unsigned Idx) {
3036       llvm::ConstantAsMetadata *V =
3037           cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3038       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3039     };
3040 
3041     auto getMDString = [&](unsigned Idx) {
3042       llvm::MDString *V = cast<llvm::MDString>(MN->getOperand(Idx));
3043       return V->getString();
3044     };
3045 
3046     switch (getMDInt(0)) {
3047     default:
3048       llvm_unreachable("Unexpected metadata!");
3049       break;
3050     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3051         OFFLOAD_ENTRY_INFO_TARGET_REGION:
3052       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3053           /*DeviceID=*/getMDInt(1), /*FileID=*/getMDInt(2),
3054           /*ParentName=*/getMDString(3), /*Line=*/getMDInt(4),
3055           /*Order=*/getMDInt(5));
3056       break;
3057     }
3058   }
3059 }
3060 
3061 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3062   if (!KmpRoutineEntryPtrTy) {
3063     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3064     auto &C = CGM.getContext();
3065     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3066     FunctionProtoType::ExtProtoInfo EPI;
3067     KmpRoutineEntryPtrQTy = C.getPointerType(
3068         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3069     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3070   }
3071 }
3072 
3073 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
3074                                        QualType FieldTy) {
3075   auto *Field = FieldDecl::Create(
3076       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
3077       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
3078       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
3079   Field->setAccess(AS_public);
3080   DC->addDecl(Field);
3081   return Field;
3082 }
3083 
3084 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3085 
3086   // Make sure the type of the entry is already created. This is the type we
3087   // have to create:
3088   // struct __tgt_offload_entry{
3089   //   void      *addr;       // Pointer to the offload entry info.
3090   //                          // (function or global)
3091   //   char      *name;       // Name of the function or global.
3092   //   size_t     size;       // Size of the entry info (0 if it a function).
3093   // };
3094   if (TgtOffloadEntryQTy.isNull()) {
3095     ASTContext &C = CGM.getContext();
3096     auto *RD = C.buildImplicitRecord("__tgt_offload_entry");
3097     RD->startDefinition();
3098     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3099     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3100     addFieldToRecordDecl(C, RD, C.getSizeType());
3101     RD->completeDefinition();
3102     TgtOffloadEntryQTy = C.getRecordType(RD);
3103   }
3104   return TgtOffloadEntryQTy;
3105 }
3106 
3107 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() {
3108   // These are the types we need to build:
3109   // struct __tgt_device_image{
3110   // void   *ImageStart;       // Pointer to the target code start.
3111   // void   *ImageEnd;         // Pointer to the target code end.
3112   // // We also add the host entries to the device image, as it may be useful
3113   // // for the target runtime to have access to that information.
3114   // __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all
3115   //                                       // the entries.
3116   // __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
3117   //                                       // entries (non inclusive).
3118   // };
3119   if (TgtDeviceImageQTy.isNull()) {
3120     ASTContext &C = CGM.getContext();
3121     auto *RD = C.buildImplicitRecord("__tgt_device_image");
3122     RD->startDefinition();
3123     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3124     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3125     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
3126     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
3127     RD->completeDefinition();
3128     TgtDeviceImageQTy = C.getRecordType(RD);
3129   }
3130   return TgtDeviceImageQTy;
3131 }
3132 
3133 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() {
3134   // struct __tgt_bin_desc{
3135   //   int32_t              NumDevices;      // Number of devices supported.
3136   //   __tgt_device_image   *DeviceImages;   // Arrays of device images
3137   //                                         // (one per device).
3138   //   __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all the
3139   //                                         // entries.
3140   //   __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
3141   //                                         // entries (non inclusive).
3142   // };
3143   if (TgtBinaryDescriptorQTy.isNull()) {
3144     ASTContext &C = CGM.getContext();
3145     auto *RD = C.buildImplicitRecord("__tgt_bin_desc");
3146     RD->startDefinition();
3147     addFieldToRecordDecl(
3148         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3149     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy()));
3150     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
3151     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
3152     RD->completeDefinition();
3153     TgtBinaryDescriptorQTy = C.getRecordType(RD);
3154   }
3155   return TgtBinaryDescriptorQTy;
3156 }
3157 
3158 namespace {
3159 struct PrivateHelpersTy {
3160   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
3161                    const VarDecl *PrivateElemInit)
3162       : Original(Original), PrivateCopy(PrivateCopy),
3163         PrivateElemInit(PrivateElemInit) {}
3164   const VarDecl *Original;
3165   const VarDecl *PrivateCopy;
3166   const VarDecl *PrivateElemInit;
3167 };
3168 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3169 } // anonymous namespace
3170 
3171 static RecordDecl *
3172 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3173   if (!Privates.empty()) {
3174     auto &C = CGM.getContext();
3175     // Build struct .kmp_privates_t. {
3176     //         /*  private vars  */
3177     //       };
3178     auto *RD = C.buildImplicitRecord(".kmp_privates.t");
3179     RD->startDefinition();
3180     for (auto &&Pair : Privates) {
3181       auto *VD = Pair.second.Original;
3182       auto Type = VD->getType();
3183       Type = Type.getNonReferenceType();
3184       auto *FD = addFieldToRecordDecl(C, RD, Type);
3185       if (VD->hasAttrs()) {
3186         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3187              E(VD->getAttrs().end());
3188              I != E; ++I)
3189           FD->addAttr(*I);
3190       }
3191     }
3192     RD->completeDefinition();
3193     return RD;
3194   }
3195   return nullptr;
3196 }
3197 
3198 static RecordDecl *
3199 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3200                          QualType KmpInt32Ty,
3201                          QualType KmpRoutineEntryPointerQTy) {
3202   auto &C = CGM.getContext();
3203   // Build struct kmp_task_t {
3204   //         void *              shareds;
3205   //         kmp_routine_entry_t routine;
3206   //         kmp_int32           part_id;
3207   //         kmp_cmplrdata_t data1;
3208   //         kmp_cmplrdata_t data2;
3209   // For taskloops additional fields:
3210   //         kmp_uint64          lb;
3211   //         kmp_uint64          ub;
3212   //         kmp_int64           st;
3213   //         kmp_int32           liter;
3214   //       };
3215   auto *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3216   UD->startDefinition();
3217   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3218   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3219   UD->completeDefinition();
3220   QualType KmpCmplrdataTy = C.getRecordType(UD);
3221   auto *RD = C.buildImplicitRecord("kmp_task_t");
3222   RD->startDefinition();
3223   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3224   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3225   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3226   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3227   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3228   if (isOpenMPTaskLoopDirective(Kind)) {
3229     QualType KmpUInt64Ty =
3230         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3231     QualType KmpInt64Ty =
3232         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3233     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3234     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3235     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3236     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3237   }
3238   RD->completeDefinition();
3239   return RD;
3240 }
3241 
3242 static RecordDecl *
3243 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3244                                      ArrayRef<PrivateDataTy> Privates) {
3245   auto &C = CGM.getContext();
3246   // Build struct kmp_task_t_with_privates {
3247   //         kmp_task_t task_data;
3248   //         .kmp_privates_t. privates;
3249   //       };
3250   auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3251   RD->startDefinition();
3252   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3253   if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) {
3254     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3255   }
3256   RD->completeDefinition();
3257   return RD;
3258 }
3259 
3260 /// \brief Emit a proxy function which accepts kmp_task_t as the second
3261 /// argument.
3262 /// \code
3263 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3264 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3265 ///   For taskloops:
3266 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3267 ///   tt->shareds);
3268 ///   return 0;
3269 /// }
3270 /// \endcode
3271 static llvm::Value *
3272 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3273                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3274                       QualType KmpTaskTWithPrivatesPtrQTy,
3275                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3276                       QualType SharedsPtrTy, llvm::Value *TaskFunction,
3277                       llvm::Value *TaskPrivatesMap) {
3278   auto &C = CGM.getContext();
3279   FunctionArgList Args;
3280   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
3281   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
3282                                 /*Id=*/nullptr,
3283                                 KmpTaskTWithPrivatesPtrQTy.withRestrict());
3284   Args.push_back(&GtidArg);
3285   Args.push_back(&TaskTypeArg);
3286   auto &TaskEntryFnInfo =
3287       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3288   auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3289   auto *TaskEntry =
3290       llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
3291                              ".omp_task_entry.", &CGM.getModule());
3292   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo);
3293   CodeGenFunction CGF(CGM);
3294   CGF.disableDebugInfo();
3295   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
3296 
3297   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3298   // tt,
3299   // For taskloops:
3300   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3301   // tt->task_data.shareds);
3302   auto *GtidParam = CGF.EmitLoadOfScalar(
3303       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3304   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3305       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3306       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3307   auto *KmpTaskTWithPrivatesQTyRD =
3308       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3309   LValue Base =
3310       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3311   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3312   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3313   auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3314   auto *PartidParam = PartIdLVal.getPointer();
3315 
3316   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3317   auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3318   auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3319       CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(),
3320       CGF.ConvertTypeForMem(SharedsPtrTy));
3321 
3322   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3323   llvm::Value *PrivatesParam;
3324   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3325     auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3326     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3327         PrivatesLVal.getPointer(), CGF.VoidPtrTy);
3328   } else
3329     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3330 
3331   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3332                                TaskPrivatesMap,
3333                                CGF.Builder
3334                                    .CreatePointerBitCastOrAddrSpaceCast(
3335                                        TDBase.getAddress(), CGF.VoidPtrTy)
3336                                    .getPointer()};
3337   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3338                                           std::end(CommonArgs));
3339   if (isOpenMPTaskLoopDirective(Kind)) {
3340     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3341     auto LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3342     auto *LBParam = CGF.EmitLoadOfLValue(LBLVal, Loc).getScalarVal();
3343     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3344     auto UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3345     auto *UBParam = CGF.EmitLoadOfLValue(UBLVal, Loc).getScalarVal();
3346     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3347     auto StLVal = CGF.EmitLValueForField(Base, *StFI);
3348     auto *StParam = CGF.EmitLoadOfLValue(StLVal, Loc).getScalarVal();
3349     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3350     auto LILVal = CGF.EmitLValueForField(Base, *LIFI);
3351     auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal();
3352     CallArgs.push_back(LBParam);
3353     CallArgs.push_back(UBParam);
3354     CallArgs.push_back(StParam);
3355     CallArgs.push_back(LIParam);
3356   }
3357   CallArgs.push_back(SharedsParam);
3358 
3359   CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
3360   CGF.EmitStoreThroughLValue(
3361       RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3362       CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3363   CGF.FinishFunction();
3364   return TaskEntry;
3365 }
3366 
3367 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3368                                             SourceLocation Loc,
3369                                             QualType KmpInt32Ty,
3370                                             QualType KmpTaskTWithPrivatesPtrQTy,
3371                                             QualType KmpTaskTWithPrivatesQTy) {
3372   auto &C = CGM.getContext();
3373   FunctionArgList Args;
3374   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
3375   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
3376                                 /*Id=*/nullptr,
3377                                 KmpTaskTWithPrivatesPtrQTy.withRestrict());
3378   Args.push_back(&GtidArg);
3379   Args.push_back(&TaskTypeArg);
3380   FunctionType::ExtInfo Info;
3381   auto &DestructorFnInfo =
3382       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3383   auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo);
3384   auto *DestructorFn =
3385       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3386                              ".omp_task_destructor.", &CGM.getModule());
3387   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn,
3388                                     DestructorFnInfo);
3389   CodeGenFunction CGF(CGM);
3390   CGF.disableDebugInfo();
3391   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3392                     Args);
3393 
3394   LValue Base = CGF.EmitLoadOfPointerLValue(
3395       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3396       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3397   auto *KmpTaskTWithPrivatesQTyRD =
3398       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3399   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3400   Base = CGF.EmitLValueForField(Base, *FI);
3401   for (auto *Field :
3402        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3403     if (auto DtorKind = Field->getType().isDestructedType()) {
3404       auto FieldLValue = CGF.EmitLValueForField(Base, Field);
3405       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
3406     }
3407   }
3408   CGF.FinishFunction();
3409   return DestructorFn;
3410 }
3411 
3412 /// \brief Emit a privates mapping function for correct handling of private and
3413 /// firstprivate variables.
3414 /// \code
3415 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3416 /// **noalias priv1,...,  <tyn> **noalias privn) {
3417 ///   *priv1 = &.privates.priv1;
3418 ///   ...;
3419 ///   *privn = &.privates.privn;
3420 /// }
3421 /// \endcode
3422 static llvm::Value *
3423 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3424                                ArrayRef<const Expr *> PrivateVars,
3425                                ArrayRef<const Expr *> FirstprivateVars,
3426                                ArrayRef<const Expr *> LastprivateVars,
3427                                QualType PrivatesQTy,
3428                                ArrayRef<PrivateDataTy> Privates) {
3429   auto &C = CGM.getContext();
3430   FunctionArgList Args;
3431   ImplicitParamDecl TaskPrivatesArg(
3432       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3433       C.getPointerType(PrivatesQTy).withConst().withRestrict());
3434   Args.push_back(&TaskPrivatesArg);
3435   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
3436   unsigned Counter = 1;
3437   for (auto *E: PrivateVars) {
3438     Args.push_back(ImplicitParamDecl::Create(
3439         C, /*DC=*/nullptr, Loc,
3440         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
3441                             .withConst()
3442                             .withRestrict()));
3443     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3444     PrivateVarsPos[VD] = Counter;
3445     ++Counter;
3446   }
3447   for (auto *E : FirstprivateVars) {
3448     Args.push_back(ImplicitParamDecl::Create(
3449         C, /*DC=*/nullptr, Loc,
3450         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
3451                             .withConst()
3452                             .withRestrict()));
3453     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3454     PrivateVarsPos[VD] = Counter;
3455     ++Counter;
3456   }
3457   for (auto *E: LastprivateVars) {
3458     Args.push_back(ImplicitParamDecl::Create(
3459         C, /*DC=*/nullptr, Loc,
3460         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
3461                             .withConst()
3462                             .withRestrict()));
3463     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3464     PrivateVarsPos[VD] = Counter;
3465     ++Counter;
3466   }
3467   auto &TaskPrivatesMapFnInfo =
3468       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3469   auto *TaskPrivatesMapTy =
3470       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3471   auto *TaskPrivatesMap = llvm::Function::Create(
3472       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage,
3473       ".omp_task_privates_map.", &CGM.getModule());
3474   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap,
3475                                     TaskPrivatesMapFnInfo);
3476   TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3477   CodeGenFunction CGF(CGM);
3478   CGF.disableDebugInfo();
3479   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3480                     TaskPrivatesMapFnInfo, Args);
3481 
3482   // *privi = &.privates.privi;
3483   LValue Base = CGF.EmitLoadOfPointerLValue(
3484       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3485       TaskPrivatesArg.getType()->castAs<PointerType>());
3486   auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3487   Counter = 0;
3488   for (auto *Field : PrivatesQTyRD->fields()) {
3489     auto FieldLVal = CGF.EmitLValueForField(Base, Field);
3490     auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3491     auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3492     auto RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3493         RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
3494     CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
3495     ++Counter;
3496   }
3497   CGF.FinishFunction();
3498   return TaskPrivatesMap;
3499 }
3500 
3501 static int array_pod_sort_comparator(const PrivateDataTy *P1,
3502                                      const PrivateDataTy *P2) {
3503   return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0);
3504 }
3505 
3506 /// Emit initialization for private variables in task-based directives.
3507 static void emitPrivatesInit(CodeGenFunction &CGF,
3508                              const OMPExecutableDirective &D,
3509                              Address KmpTaskSharedsPtr, LValue TDBase,
3510                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3511                              QualType SharedsTy, QualType SharedsPtrTy,
3512                              const OMPTaskDataTy &Data,
3513                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3514   auto &C = CGF.getContext();
3515   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3516   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3517   LValue SrcBase;
3518   if (!Data.FirstprivateVars.empty()) {
3519     SrcBase = CGF.MakeAddrLValue(
3520         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3521             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3522         SharedsTy);
3523   }
3524   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(
3525       cast<CapturedStmt>(*D.getAssociatedStmt()));
3526   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3527   for (auto &&Pair : Privates) {
3528     auto *VD = Pair.second.PrivateCopy;
3529     auto *Init = VD->getAnyInitializer();
3530     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3531                              !CGF.isTrivialInitializer(Init)))) {
3532       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3533       if (auto *Elem = Pair.second.PrivateElemInit) {
3534         auto *OriginalVD = Pair.second.Original;
3535         auto *SharedField = CapturesInfo.lookup(OriginalVD);
3536         auto SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3537         SharedRefLValue = CGF.MakeAddrLValue(
3538             Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
3539             SharedRefLValue.getType(), AlignmentSource::Decl);
3540         QualType Type = OriginalVD->getType();
3541         if (Type->isArrayType()) {
3542           // Initialize firstprivate array.
3543           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3544             // Perform simple memcpy.
3545             CGF.EmitAggregateAssign(PrivateLValue.getAddress(),
3546                                     SharedRefLValue.getAddress(), Type);
3547           } else {
3548             // Initialize firstprivate array using element-by-element
3549             // intialization.
3550             CGF.EmitOMPAggregateAssign(
3551                 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
3552                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3553                                                   Address SrcElement) {
3554                   // Clean up any temporaries needed by the initialization.
3555                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3556                   InitScope.addPrivate(
3557                       Elem, [SrcElement]() -> Address { return SrcElement; });
3558                   (void)InitScope.Privatize();
3559                   // Emit initialization for single element.
3560                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3561                       CGF, &CapturesInfo);
3562                   CGF.EmitAnyExprToMem(Init, DestElement,
3563                                        Init->getType().getQualifiers(),
3564                                        /*IsInitializer=*/false);
3565                 });
3566           }
3567         } else {
3568           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3569           InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
3570             return SharedRefLValue.getAddress();
3571           });
3572           (void)InitScope.Privatize();
3573           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3574           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3575                              /*capturedByInit=*/false);
3576         }
3577       } else
3578         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3579     }
3580     ++FI;
3581   }
3582 }
3583 
3584 /// Check if duplication function is required for taskloops.
3585 static bool checkInitIsRequired(CodeGenFunction &CGF,
3586                                 ArrayRef<PrivateDataTy> Privates) {
3587   bool InitRequired = false;
3588   for (auto &&Pair : Privates) {
3589     auto *VD = Pair.second.PrivateCopy;
3590     auto *Init = VD->getAnyInitializer();
3591     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3592                                     !CGF.isTrivialInitializer(Init));
3593   }
3594   return InitRequired;
3595 }
3596 
3597 
3598 /// Emit task_dup function (for initialization of
3599 /// private/firstprivate/lastprivate vars and last_iter flag)
3600 /// \code
3601 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3602 /// lastpriv) {
3603 /// // setup lastprivate flag
3604 ///    task_dst->last = lastpriv;
3605 /// // could be constructor calls here...
3606 /// }
3607 /// \endcode
3608 static llvm::Value *
3609 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3610                     const OMPExecutableDirective &D,
3611                     QualType KmpTaskTWithPrivatesPtrQTy,
3612                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3613                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3614                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3615                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3616   auto &C = CGM.getContext();
3617   FunctionArgList Args;
3618   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc,
3619                            /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy);
3620   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc,
3621                            /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy);
3622   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc,
3623                                 /*Id=*/nullptr, C.IntTy);
3624   Args.push_back(&DstArg);
3625   Args.push_back(&SrcArg);
3626   Args.push_back(&LastprivArg);
3627   auto &TaskDupFnInfo =
3628       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3629   auto *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3630   auto *TaskDup =
3631       llvm::Function::Create(TaskDupTy, llvm::GlobalValue::InternalLinkage,
3632                              ".omp_task_dup.", &CGM.getModule());
3633   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskDup, TaskDupFnInfo);
3634   CodeGenFunction CGF(CGM);
3635   CGF.disableDebugInfo();
3636   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args);
3637 
3638   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3639       CGF.GetAddrOfLocalVar(&DstArg),
3640       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3641   // task_dst->liter = lastpriv;
3642   if (WithLastIter) {
3643     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3644     LValue Base = CGF.EmitLValueForField(
3645         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3646     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3647     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3648         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3649     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3650   }
3651 
3652   // Emit initial values for private copies (if any).
3653   assert(!Privates.empty());
3654   Address KmpTaskSharedsPtr = Address::invalid();
3655   if (!Data.FirstprivateVars.empty()) {
3656     LValue TDBase = CGF.EmitLoadOfPointerLValue(
3657         CGF.GetAddrOfLocalVar(&SrcArg),
3658         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3659     LValue Base = CGF.EmitLValueForField(
3660         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3661     KmpTaskSharedsPtr = Address(
3662         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3663                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
3664                                                   KmpTaskTShareds)),
3665                              Loc),
3666         CGF.getNaturalTypeAlignment(SharedsTy));
3667   }
3668   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3669                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3670   CGF.FinishFunction();
3671   return TaskDup;
3672 }
3673 
3674 /// Checks if destructor function is required to be generated.
3675 /// \return true if cleanups are required, false otherwise.
3676 static bool
3677 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
3678   bool NeedsCleanup = false;
3679   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3680   auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
3681   for (auto *FD : PrivateRD->fields()) {
3682     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
3683     if (NeedsCleanup)
3684       break;
3685   }
3686   return NeedsCleanup;
3687 }
3688 
3689 CGOpenMPRuntime::TaskResultTy
3690 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
3691                               const OMPExecutableDirective &D,
3692                               llvm::Value *TaskFunction, QualType SharedsTy,
3693                               Address Shareds, const OMPTaskDataTy &Data) {
3694   auto &C = CGM.getContext();
3695   llvm::SmallVector<PrivateDataTy, 4> Privates;
3696   // Aggregate privates and sort them by the alignment.
3697   auto I = Data.PrivateCopies.begin();
3698   for (auto *E : Data.PrivateVars) {
3699     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3700     Privates.push_back(std::make_pair(
3701         C.getDeclAlign(VD),
3702         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3703                          /*PrivateElemInit=*/nullptr)));
3704     ++I;
3705   }
3706   I = Data.FirstprivateCopies.begin();
3707   auto IElemInitRef = Data.FirstprivateInits.begin();
3708   for (auto *E : Data.FirstprivateVars) {
3709     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3710     Privates.push_back(std::make_pair(
3711         C.getDeclAlign(VD),
3712         PrivateHelpersTy(
3713             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3714             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))));
3715     ++I;
3716     ++IElemInitRef;
3717   }
3718   I = Data.LastprivateCopies.begin();
3719   for (auto *E : Data.LastprivateVars) {
3720     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3721     Privates.push_back(std::make_pair(
3722         C.getDeclAlign(VD),
3723         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3724                          /*PrivateElemInit=*/nullptr)));
3725     ++I;
3726   }
3727   llvm::array_pod_sort(Privates.begin(), Privates.end(),
3728                        array_pod_sort_comparator);
3729   auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3730   // Build type kmp_routine_entry_t (if not built yet).
3731   emitKmpRoutineEntryT(KmpInt32Ty);
3732   // Build type kmp_task_t (if not built yet).
3733   if (KmpTaskTQTy.isNull()) {
3734     KmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3735         CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3736   }
3737   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3738   // Build particular struct kmp_task_t for the given task.
3739   auto *KmpTaskTWithPrivatesQTyRD =
3740       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
3741   auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
3742   QualType KmpTaskTWithPrivatesPtrQTy =
3743       C.getPointerType(KmpTaskTWithPrivatesQTy);
3744   auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
3745   auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo();
3746   auto *KmpTaskTWithPrivatesTySize = CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3747   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3748 
3749   // Emit initial values for private copies (if any).
3750   llvm::Value *TaskPrivatesMap = nullptr;
3751   auto *TaskPrivatesMapTy =
3752       std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(),
3753                 3)
3754           ->getType();
3755   if (!Privates.empty()) {
3756     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3757     TaskPrivatesMap = emitTaskPrivateMappingFunction(
3758         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
3759         FI->getType(), Privates);
3760     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3761         TaskPrivatesMap, TaskPrivatesMapTy);
3762   } else {
3763     TaskPrivatesMap = llvm::ConstantPointerNull::get(
3764         cast<llvm::PointerType>(TaskPrivatesMapTy));
3765   }
3766   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3767   // kmp_task_t *tt);
3768   auto *TaskEntry = emitProxyTaskFunction(
3769       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3770       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3771       TaskPrivatesMap);
3772 
3773   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3774   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3775   // kmp_routine_entry_t *task_entry);
3776   // Task flags. Format is taken from
3777   // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
3778   // description of kmp_tasking_flags struct.
3779   enum {
3780     TiedFlag = 0x1,
3781     FinalFlag = 0x2,
3782     DestructorsFlag = 0x8,
3783     PriorityFlag = 0x20
3784   };
3785   unsigned Flags = Data.Tied ? TiedFlag : 0;
3786   bool NeedsCleanup = false;
3787   if (!Privates.empty()) {
3788     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
3789     if (NeedsCleanup)
3790       Flags = Flags | DestructorsFlag;
3791   }
3792   if (Data.Priority.getInt())
3793     Flags = Flags | PriorityFlag;
3794   auto *TaskFlags =
3795       Data.Final.getPointer()
3796           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3797                                      CGF.Builder.getInt32(FinalFlag),
3798                                      CGF.Builder.getInt32(/*C=*/0))
3799           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3800   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3801   auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3802   llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
3803                               getThreadID(CGF, Loc), TaskFlags,
3804                               KmpTaskTWithPrivatesTySize, SharedsSize,
3805                               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3806                                   TaskEntry, KmpRoutineEntryPtrTy)};
3807   auto *NewTask = CGF.EmitRuntimeCall(
3808       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
3809   auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3810       NewTask, KmpTaskTWithPrivatesPtrTy);
3811   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
3812                                                KmpTaskTWithPrivatesQTy);
3813   LValue TDBase =
3814       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
3815   // Fill the data in the resulting kmp_task_t record.
3816   // Copy shareds if there are any.
3817   Address KmpTaskSharedsPtr = Address::invalid();
3818   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
3819     KmpTaskSharedsPtr =
3820         Address(CGF.EmitLoadOfScalar(
3821                     CGF.EmitLValueForField(
3822                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
3823                                            KmpTaskTShareds)),
3824                     Loc),
3825                 CGF.getNaturalTypeAlignment(SharedsTy));
3826     CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy);
3827   }
3828   // Emit initial values for private copies (if any).
3829   TaskResultTy Result;
3830   if (!Privates.empty()) {
3831     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
3832                      SharedsTy, SharedsPtrTy, Data, Privates,
3833                      /*ForDup=*/false);
3834     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
3835         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3836       Result.TaskDupFn = emitTaskDupFunction(
3837           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3838           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3839           /*WithLastIter=*/!Data.LastprivateVars.empty());
3840     }
3841   }
3842   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
3843   enum { Priority = 0, Destructors = 1 };
3844   // Provide pointer to function with destructors for privates.
3845   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
3846   auto *KmpCmplrdataUD = (*FI)->getType()->getAsUnionType()->getDecl();
3847   if (NeedsCleanup) {
3848     llvm::Value *DestructorFn = emitDestructorsFunction(
3849         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3850         KmpTaskTWithPrivatesQTy);
3851     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
3852     LValue DestructorsLV = CGF.EmitLValueForField(
3853         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
3854     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3855                               DestructorFn, KmpRoutineEntryPtrTy),
3856                           DestructorsLV);
3857   }
3858   // Set priority.
3859   if (Data.Priority.getInt()) {
3860     LValue Data2LV = CGF.EmitLValueForField(
3861         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
3862     LValue PriorityLV = CGF.EmitLValueForField(
3863         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
3864     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
3865   }
3866   Result.NewTask = NewTask;
3867   Result.TaskEntry = TaskEntry;
3868   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
3869   Result.TDBase = TDBase;
3870   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
3871   return Result;
3872 }
3873 
3874 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
3875                                    const OMPExecutableDirective &D,
3876                                    llvm::Value *TaskFunction,
3877                                    QualType SharedsTy, Address Shareds,
3878                                    const Expr *IfCond,
3879                                    const OMPTaskDataTy &Data) {
3880   if (!CGF.HaveInsertPoint())
3881     return;
3882 
3883   TaskResultTy Result =
3884       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
3885   llvm::Value *NewTask = Result.NewTask;
3886   llvm::Value *TaskEntry = Result.TaskEntry;
3887   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
3888   LValue TDBase = Result.TDBase;
3889   RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
3890   auto &C = CGM.getContext();
3891   // Process list of dependences.
3892   Address DependenciesArray = Address::invalid();
3893   unsigned NumDependencies = Data.Dependences.size();
3894   if (NumDependencies) {
3895     // Dependence kind for RTL.
3896     enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 };
3897     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
3898     RecordDecl *KmpDependInfoRD;
3899     QualType FlagsTy =
3900         C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
3901     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
3902     if (KmpDependInfoTy.isNull()) {
3903       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
3904       KmpDependInfoRD->startDefinition();
3905       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
3906       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
3907       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
3908       KmpDependInfoRD->completeDefinition();
3909       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
3910     } else
3911       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
3912     CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy);
3913     // Define type kmp_depend_info[<Dependences.size()>];
3914     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
3915         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
3916         ArrayType::Normal, /*IndexTypeQuals=*/0);
3917     // kmp_depend_info[<Dependences.size()>] deps;
3918     DependenciesArray =
3919         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
3920     for (unsigned i = 0; i < NumDependencies; ++i) {
3921       const Expr *E = Data.Dependences[i].second;
3922       auto Addr = CGF.EmitLValue(E);
3923       llvm::Value *Size;
3924       QualType Ty = E->getType();
3925       if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
3926         LValue UpAddrLVal =
3927             CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
3928         llvm::Value *UpAddr =
3929             CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
3930         llvm::Value *LowIntPtr =
3931             CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
3932         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
3933         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3934       } else
3935         Size = CGF.getTypeSize(Ty);
3936       auto Base = CGF.MakeAddrLValue(
3937           CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize),
3938           KmpDependInfoTy);
3939       // deps[i].base_addr = &<Dependences[i].second>;
3940       auto BaseAddrLVal = CGF.EmitLValueForField(
3941           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
3942       CGF.EmitStoreOfScalar(
3943           CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
3944           BaseAddrLVal);
3945       // deps[i].len = sizeof(<Dependences[i].second>);
3946       auto LenLVal = CGF.EmitLValueForField(
3947           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
3948       CGF.EmitStoreOfScalar(Size, LenLVal);
3949       // deps[i].flags = <Dependences[i].first>;
3950       RTLDependenceKindTy DepKind;
3951       switch (Data.Dependences[i].first) {
3952       case OMPC_DEPEND_in:
3953         DepKind = DepIn;
3954         break;
3955       // Out and InOut dependencies must use the same code.
3956       case OMPC_DEPEND_out:
3957       case OMPC_DEPEND_inout:
3958         DepKind = DepInOut;
3959         break;
3960       case OMPC_DEPEND_source:
3961       case OMPC_DEPEND_sink:
3962       case OMPC_DEPEND_unknown:
3963         llvm_unreachable("Unknown task dependence type");
3964       }
3965       auto FlagsLVal = CGF.EmitLValueForField(
3966           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
3967       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
3968                             FlagsLVal);
3969     }
3970     DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3971         CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()),
3972         CGF.VoidPtrTy);
3973   }
3974 
3975   // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
3976   // libcall.
3977   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
3978   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
3979   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
3980   // list is not empty
3981   auto *ThreadID = getThreadID(CGF, Loc);
3982   auto *UpLoc = emitUpdateLocation(CGF, Loc);
3983   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
3984   llvm::Value *DepTaskArgs[7];
3985   if (NumDependencies) {
3986     DepTaskArgs[0] = UpLoc;
3987     DepTaskArgs[1] = ThreadID;
3988     DepTaskArgs[2] = NewTask;
3989     DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
3990     DepTaskArgs[4] = DependenciesArray.getPointer();
3991     DepTaskArgs[5] = CGF.Builder.getInt32(0);
3992     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3993   }
3994   auto &&ThenCodeGen = [this, Loc, &Data, TDBase, KmpTaskTQTyRD,
3995                         NumDependencies, &TaskArgs,
3996                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
3997     if (!Data.Tied) {
3998       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3999       auto PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4000       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4001     }
4002     if (NumDependencies) {
4003       CGF.EmitRuntimeCall(
4004           createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
4005     } else {
4006       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
4007                           TaskArgs);
4008     }
4009     // Check if parent region is untied and build return for untied task;
4010     if (auto *Region =
4011             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4012       Region->emitUntiedSwitch(CGF);
4013   };
4014 
4015   llvm::Value *DepWaitTaskArgs[6];
4016   if (NumDependencies) {
4017     DepWaitTaskArgs[0] = UpLoc;
4018     DepWaitTaskArgs[1] = ThreadID;
4019     DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
4020     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
4021     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4022     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4023   }
4024   auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
4025                         NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF,
4026                                                            PrePostActionTy &) {
4027     auto &RT = CGF.CGM.getOpenMPRuntime();
4028     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4029     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4030     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4031     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4032     // is specified.
4033     if (NumDependencies)
4034       CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
4035                           DepWaitTaskArgs);
4036     // Call proxy_task_entry(gtid, new_task);
4037     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy](
4038         CodeGenFunction &CGF, PrePostActionTy &Action) {
4039       Action.Enter(CGF);
4040       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4041       CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs);
4042     };
4043 
4044     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4045     // kmp_task_t *new_task);
4046     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4047     // kmp_task_t *new_task);
4048     RegionCodeGenTy RCG(CodeGen);
4049     CommonActionTy Action(
4050         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
4051         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
4052     RCG.setAction(Action);
4053     RCG(CGF);
4054   };
4055 
4056   if (IfCond)
4057     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4058   else {
4059     RegionCodeGenTy ThenRCG(ThenCodeGen);
4060     ThenRCG(CGF);
4061   }
4062 }
4063 
4064 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
4065                                        const OMPLoopDirective &D,
4066                                        llvm::Value *TaskFunction,
4067                                        QualType SharedsTy, Address Shareds,
4068                                        const Expr *IfCond,
4069                                        const OMPTaskDataTy &Data) {
4070   if (!CGF.HaveInsertPoint())
4071     return;
4072   TaskResultTy Result =
4073       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4074   // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
4075   // libcall.
4076   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4077   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4078   // sched, kmp_uint64 grainsize, void *task_dup);
4079   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4080   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4081   llvm::Value *IfVal;
4082   if (IfCond) {
4083     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4084                                       /*isSigned=*/true);
4085   } else
4086     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4087 
4088   LValue LBLVal = CGF.EmitLValueForField(
4089       Result.TDBase,
4090       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4091   auto *LBVar =
4092       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4093   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
4094                        /*IsInitializer=*/true);
4095   LValue UBLVal = CGF.EmitLValueForField(
4096       Result.TDBase,
4097       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4098   auto *UBVar =
4099       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4100   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
4101                        /*IsInitializer=*/true);
4102   LValue StLVal = CGF.EmitLValueForField(
4103       Result.TDBase,
4104       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4105   auto *StVar =
4106       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4107   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
4108                        /*IsInitializer=*/true);
4109   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4110   llvm::Value *TaskArgs[] = {
4111       UpLoc, ThreadID, Result.NewTask, IfVal, LBLVal.getPointer(),
4112       UBLVal.getPointer(), CGF.EmitLoadOfScalar(StLVal, SourceLocation()),
4113       llvm::ConstantInt::getSigned(CGF.IntTy, Data.Nogroup ? 1 : 0),
4114       llvm::ConstantInt::getSigned(
4115           CGF.IntTy, Data.Schedule.getPointer()
4116                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
4117                          : NoSchedule),
4118       Data.Schedule.getPointer()
4119           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4120                                       /*isSigned=*/false)
4121           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
4122       Result.TaskDupFn
4123           ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Result.TaskDupFn,
4124                                                             CGF.VoidPtrTy)
4125           : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
4126   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
4127 }
4128 
4129 /// \brief Emit reduction operation for each element of array (required for
4130 /// array sections) LHS op = RHS.
4131 /// \param Type Type of array.
4132 /// \param LHSVar Variable on the left side of the reduction operation
4133 /// (references element of array in original variable).
4134 /// \param RHSVar Variable on the right side of the reduction operation
4135 /// (references element of array in original variable).
4136 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
4137 /// RHSVar.
4138 static void EmitOMPAggregateReduction(
4139     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4140     const VarDecl *RHSVar,
4141     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4142                                   const Expr *, const Expr *)> &RedOpGen,
4143     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4144     const Expr *UpExpr = nullptr) {
4145   // Perform element-by-element initialization.
4146   QualType ElementTy;
4147   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4148   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4149 
4150   // Drill down to the base element type on both arrays.
4151   auto ArrayTy = Type->getAsArrayTypeUnsafe();
4152   auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4153 
4154   auto RHSBegin = RHSAddr.getPointer();
4155   auto LHSBegin = LHSAddr.getPointer();
4156   // Cast from pointer to array type to pointer to single element.
4157   auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
4158   // The basic structure here is a while-do loop.
4159   auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4160   auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4161   auto IsEmpty =
4162       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4163   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4164 
4165   // Enter the loop body, making that address the current address.
4166   auto EntryBB = CGF.Builder.GetInsertBlock();
4167   CGF.EmitBlock(BodyBB);
4168 
4169   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4170 
4171   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4172       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4173   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4174   Address RHSElementCurrent =
4175       Address(RHSElementPHI,
4176               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4177 
4178   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4179       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4180   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4181   Address LHSElementCurrent =
4182       Address(LHSElementPHI,
4183               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4184 
4185   // Emit copy.
4186   CodeGenFunction::OMPPrivateScope Scope(CGF);
4187   Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; });
4188   Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; });
4189   Scope.Privatize();
4190   RedOpGen(CGF, XExpr, EExpr, UpExpr);
4191   Scope.ForceCleanup();
4192 
4193   // Shift the address forward by one element.
4194   auto LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4195       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
4196   auto RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4197       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
4198   // Check whether we've reached the end.
4199   auto Done =
4200       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4201   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4202   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4203   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4204 
4205   // Done.
4206   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4207 }
4208 
4209 /// Emit reduction combiner. If the combiner is a simple expression emit it as
4210 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4211 /// UDR combiner function.
4212 static void emitReductionCombiner(CodeGenFunction &CGF,
4213                                   const Expr *ReductionOp) {
4214   if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
4215     if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4216       if (auto *DRE =
4217               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4218         if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4219           std::pair<llvm::Function *, llvm::Function *> Reduction =
4220               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
4221           RValue Func = RValue::get(Reduction.first);
4222           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4223           CGF.EmitIgnoredExpr(ReductionOp);
4224           return;
4225         }
4226   CGF.EmitIgnoredExpr(ReductionOp);
4227 }
4228 
4229 static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
4230                                           llvm::Type *ArgsType,
4231                                           ArrayRef<const Expr *> Privates,
4232                                           ArrayRef<const Expr *> LHSExprs,
4233                                           ArrayRef<const Expr *> RHSExprs,
4234                                           ArrayRef<const Expr *> ReductionOps) {
4235   auto &C = CGM.getContext();
4236 
4237   // void reduction_func(void *LHSArg, void *RHSArg);
4238   FunctionArgList Args;
4239   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
4240                            C.VoidPtrTy);
4241   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
4242                            C.VoidPtrTy);
4243   Args.push_back(&LHSArg);
4244   Args.push_back(&RHSArg);
4245   auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4246   auto *Fn = llvm::Function::Create(
4247       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
4248       ".omp.reduction.reduction_func", &CGM.getModule());
4249   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
4250   CodeGenFunction CGF(CGM);
4251   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
4252 
4253   // Dst = (void*[n])(LHSArg);
4254   // Src = (void*[n])(RHSArg);
4255   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4256       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4257       ArgsType), CGF.getPointerAlign());
4258   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4259       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
4260       ArgsType), CGF.getPointerAlign());
4261 
4262   //  ...
4263   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4264   //  ...
4265   CodeGenFunction::OMPPrivateScope Scope(CGF);
4266   auto IPriv = Privates.begin();
4267   unsigned Idx = 0;
4268   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4269     auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
4270     Scope.addPrivate(RHSVar, [&]() -> Address {
4271       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
4272     });
4273     auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
4274     Scope.addPrivate(LHSVar, [&]() -> Address {
4275       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
4276     });
4277     QualType PrivTy = (*IPriv)->getType();
4278     if (PrivTy->isVariablyModifiedType()) {
4279       // Get array size and emit VLA type.
4280       ++Idx;
4281       Address Elem =
4282           CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize());
4283       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
4284       auto *VLA = CGF.getContext().getAsVariableArrayType(PrivTy);
4285       auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
4286       CodeGenFunction::OpaqueValueMapping OpaqueMap(
4287           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
4288       CGF.EmitVariablyModifiedType(PrivTy);
4289     }
4290   }
4291   Scope.Privatize();
4292   IPriv = Privates.begin();
4293   auto ILHS = LHSExprs.begin();
4294   auto IRHS = RHSExprs.begin();
4295   for (auto *E : ReductionOps) {
4296     if ((*IPriv)->getType()->isArrayType()) {
4297       // Emit reduction for array section.
4298       auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4299       auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4300       EmitOMPAggregateReduction(
4301           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4302           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4303             emitReductionCombiner(CGF, E);
4304           });
4305     } else
4306       // Emit reduction for array subscript or single variable.
4307       emitReductionCombiner(CGF, E);
4308     ++IPriv;
4309     ++ILHS;
4310     ++IRHS;
4311   }
4312   Scope.ForceCleanup();
4313   CGF.FinishFunction();
4314   return Fn;
4315 }
4316 
4317 static void emitSingleReductionCombiner(CodeGenFunction &CGF,
4318                                         const Expr *ReductionOp,
4319                                         const Expr *PrivateRef,
4320                                         const DeclRefExpr *LHS,
4321                                         const DeclRefExpr *RHS) {
4322   if (PrivateRef->getType()->isArrayType()) {
4323     // Emit reduction for array section.
4324     auto *LHSVar = cast<VarDecl>(LHS->getDecl());
4325     auto *RHSVar = cast<VarDecl>(RHS->getDecl());
4326     EmitOMPAggregateReduction(
4327         CGF, PrivateRef->getType(), LHSVar, RHSVar,
4328         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4329           emitReductionCombiner(CGF, ReductionOp);
4330         });
4331   } else
4332     // Emit reduction for array subscript or single variable.
4333     emitReductionCombiner(CGF, ReductionOp);
4334 }
4335 
4336 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
4337                                     ArrayRef<const Expr *> Privates,
4338                                     ArrayRef<const Expr *> LHSExprs,
4339                                     ArrayRef<const Expr *> RHSExprs,
4340                                     ArrayRef<const Expr *> ReductionOps,
4341                                     bool WithNowait, bool SimpleReduction) {
4342   if (!CGF.HaveInsertPoint())
4343     return;
4344   // Next code should be emitted for reduction:
4345   //
4346   // static kmp_critical_name lock = { 0 };
4347   //
4348   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
4349   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
4350   //  ...
4351   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
4352   //  *(Type<n>-1*)rhs[<n>-1]);
4353   // }
4354   //
4355   // ...
4356   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
4357   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
4358   // RedList, reduce_func, &<lock>)) {
4359   // case 1:
4360   //  ...
4361   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4362   //  ...
4363   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4364   // break;
4365   // case 2:
4366   //  ...
4367   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
4368   //  ...
4369   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
4370   // break;
4371   // default:;
4372   // }
4373   //
4374   // if SimpleReduction is true, only the next code is generated:
4375   //  ...
4376   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4377   //  ...
4378 
4379   auto &C = CGM.getContext();
4380 
4381   if (SimpleReduction) {
4382     CodeGenFunction::RunCleanupsScope Scope(CGF);
4383     auto IPriv = Privates.begin();
4384     auto ILHS = LHSExprs.begin();
4385     auto IRHS = RHSExprs.begin();
4386     for (auto *E : ReductionOps) {
4387       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
4388                                   cast<DeclRefExpr>(*IRHS));
4389       ++IPriv;
4390       ++ILHS;
4391       ++IRHS;
4392     }
4393     return;
4394   }
4395 
4396   // 1. Build a list of reduction variables.
4397   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
4398   auto Size = RHSExprs.size();
4399   for (auto *E : Privates) {
4400     if (E->getType()->isVariablyModifiedType())
4401       // Reserve place for array size.
4402       ++Size;
4403   }
4404   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
4405   QualType ReductionArrayTy =
4406       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
4407                              /*IndexTypeQuals=*/0);
4408   Address ReductionList =
4409       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
4410   auto IPriv = Privates.begin();
4411   unsigned Idx = 0;
4412   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
4413     Address Elem =
4414       CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize());
4415     CGF.Builder.CreateStore(
4416         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4417             CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
4418         Elem);
4419     if ((*IPriv)->getType()->isVariablyModifiedType()) {
4420       // Store array size.
4421       ++Idx;
4422       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
4423                                              CGF.getPointerSize());
4424       llvm::Value *Size = CGF.Builder.CreateIntCast(
4425           CGF.getVLASize(
4426                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
4427               .first,
4428           CGF.SizeTy, /*isSigned=*/false);
4429       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
4430                               Elem);
4431     }
4432   }
4433 
4434   // 2. Emit reduce_func().
4435   auto *ReductionFn = emitReductionFunction(
4436       CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
4437       LHSExprs, RHSExprs, ReductionOps);
4438 
4439   // 3. Create static kmp_critical_name lock = { 0 };
4440   auto *Lock = getCriticalRegionLock(".reduction");
4441 
4442   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
4443   // RedList, reduce_func, &<lock>);
4444   auto *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
4445   auto *ThreadId = getThreadID(CGF, Loc);
4446   auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
4447   auto *RL =
4448     CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList.getPointer(),
4449                                                     CGF.VoidPtrTy);
4450   llvm::Value *Args[] = {
4451       IdentTLoc,                             // ident_t *<loc>
4452       ThreadId,                              // i32 <gtid>
4453       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
4454       ReductionArrayTySize,                  // size_type sizeof(RedList)
4455       RL,                                    // void *RedList
4456       ReductionFn, // void (*) (void *, void *) <reduce_func>
4457       Lock         // kmp_critical_name *&<lock>
4458   };
4459   auto Res = CGF.EmitRuntimeCall(
4460       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
4461                                        : OMPRTL__kmpc_reduce),
4462       Args);
4463 
4464   // 5. Build switch(res)
4465   auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
4466   auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
4467 
4468   // 6. Build case 1:
4469   //  ...
4470   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4471   //  ...
4472   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4473   // break;
4474   auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
4475   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
4476   CGF.EmitBlock(Case1BB);
4477 
4478   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4479   llvm::Value *EndArgs[] = {
4480       IdentTLoc, // ident_t *<loc>
4481       ThreadId,  // i32 <gtid>
4482       Lock       // kmp_critical_name *&<lock>
4483   };
4484   auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps](
4485       CodeGenFunction &CGF, PrePostActionTy &Action) {
4486     auto IPriv = Privates.begin();
4487     auto ILHS = LHSExprs.begin();
4488     auto IRHS = RHSExprs.begin();
4489     for (auto *E : ReductionOps) {
4490       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
4491                                   cast<DeclRefExpr>(*IRHS));
4492       ++IPriv;
4493       ++ILHS;
4494       ++IRHS;
4495     }
4496   };
4497   RegionCodeGenTy RCG(CodeGen);
4498   CommonActionTy Action(
4499       nullptr, llvm::None,
4500       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
4501                                        : OMPRTL__kmpc_end_reduce),
4502       EndArgs);
4503   RCG.setAction(Action);
4504   RCG(CGF);
4505 
4506   CGF.EmitBranch(DefaultBB);
4507 
4508   // 7. Build case 2:
4509   //  ...
4510   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
4511   //  ...
4512   // break;
4513   auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
4514   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
4515   CGF.EmitBlock(Case2BB);
4516 
4517   auto &&AtomicCodeGen = [Loc, &Privates, &LHSExprs, &RHSExprs, &ReductionOps](
4518       CodeGenFunction &CGF, PrePostActionTy &Action) {
4519     auto ILHS = LHSExprs.begin();
4520     auto IRHS = RHSExprs.begin();
4521     auto IPriv = Privates.begin();
4522     for (auto *E : ReductionOps) {
4523       const Expr *XExpr = nullptr;
4524       const Expr *EExpr = nullptr;
4525       const Expr *UpExpr = nullptr;
4526       BinaryOperatorKind BO = BO_Comma;
4527       if (auto *BO = dyn_cast<BinaryOperator>(E)) {
4528         if (BO->getOpcode() == BO_Assign) {
4529           XExpr = BO->getLHS();
4530           UpExpr = BO->getRHS();
4531         }
4532       }
4533       // Try to emit update expression as a simple atomic.
4534       auto *RHSExpr = UpExpr;
4535       if (RHSExpr) {
4536         // Analyze RHS part of the whole expression.
4537         if (auto *ACO = dyn_cast<AbstractConditionalOperator>(
4538                 RHSExpr->IgnoreParenImpCasts())) {
4539           // If this is a conditional operator, analyze its condition for
4540           // min/max reduction operator.
4541           RHSExpr = ACO->getCond();
4542         }
4543         if (auto *BORHS =
4544                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
4545           EExpr = BORHS->getRHS();
4546           BO = BORHS->getOpcode();
4547         }
4548       }
4549       if (XExpr) {
4550         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4551         auto &&AtomicRedGen = [BO, VD, IPriv,
4552                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
4553                                     const Expr *EExpr, const Expr *UpExpr) {
4554           LValue X = CGF.EmitLValue(XExpr);
4555           RValue E;
4556           if (EExpr)
4557             E = CGF.EmitAnyExpr(EExpr);
4558           CGF.EmitOMPAtomicSimpleUpdateExpr(
4559               X, E, BO, /*IsXLHSInRHSPart=*/true,
4560               llvm::AtomicOrdering::Monotonic, Loc,
4561               [&CGF, UpExpr, VD, IPriv, Loc](RValue XRValue) {
4562                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4563                 PrivateScope.addPrivate(
4564                     VD, [&CGF, VD, XRValue, Loc]() -> Address {
4565                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
4566                       CGF.emitOMPSimpleStore(
4567                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
4568                           VD->getType().getNonReferenceType(), Loc);
4569                       return LHSTemp;
4570                     });
4571                 (void)PrivateScope.Privatize();
4572                 return CGF.EmitAnyExpr(UpExpr);
4573               });
4574         };
4575         if ((*IPriv)->getType()->isArrayType()) {
4576           // Emit atomic reduction for array section.
4577           auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4578           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
4579                                     AtomicRedGen, XExpr, EExpr, UpExpr);
4580         } else
4581           // Emit atomic reduction for array subscript or single variable.
4582           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
4583       } else {
4584         // Emit as a critical region.
4585         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
4586                                      const Expr *, const Expr *) {
4587           auto &RT = CGF.CGM.getOpenMPRuntime();
4588           RT.emitCriticalRegion(
4589               CGF, ".atomic_reduction",
4590               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
4591                 Action.Enter(CGF);
4592                 emitReductionCombiner(CGF, E);
4593               },
4594               Loc);
4595         };
4596         if ((*IPriv)->getType()->isArrayType()) {
4597           auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4598           auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4599           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4600                                     CritRedGen);
4601         } else
4602           CritRedGen(CGF, nullptr, nullptr, nullptr);
4603       }
4604       ++ILHS;
4605       ++IRHS;
4606       ++IPriv;
4607     }
4608   };
4609   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
4610   if (!WithNowait) {
4611     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
4612     llvm::Value *EndArgs[] = {
4613         IdentTLoc, // ident_t *<loc>
4614         ThreadId,  // i32 <gtid>
4615         Lock       // kmp_critical_name *&<lock>
4616     };
4617     CommonActionTy Action(nullptr, llvm::None,
4618                           createRuntimeFunction(OMPRTL__kmpc_end_reduce),
4619                           EndArgs);
4620     AtomicRCG.setAction(Action);
4621     AtomicRCG(CGF);
4622   } else
4623     AtomicRCG(CGF);
4624 
4625   CGF.EmitBranch(DefaultBB);
4626   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
4627 }
4628 
4629 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
4630                                        SourceLocation Loc) {
4631   if (!CGF.HaveInsertPoint())
4632     return;
4633   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
4634   // global_tid);
4635   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
4636   // Ignore return result until untied tasks are supported.
4637   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
4638   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4639     Region->emitUntiedSwitch(CGF);
4640 }
4641 
4642 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
4643                                            OpenMPDirectiveKind InnerKind,
4644                                            const RegionCodeGenTy &CodeGen,
4645                                            bool HasCancel) {
4646   if (!CGF.HaveInsertPoint())
4647     return;
4648   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
4649   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
4650 }
4651 
4652 namespace {
4653 enum RTCancelKind {
4654   CancelNoreq = 0,
4655   CancelParallel = 1,
4656   CancelLoop = 2,
4657   CancelSections = 3,
4658   CancelTaskgroup = 4
4659 };
4660 } // anonymous namespace
4661 
4662 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
4663   RTCancelKind CancelKind = CancelNoreq;
4664   if (CancelRegion == OMPD_parallel)
4665     CancelKind = CancelParallel;
4666   else if (CancelRegion == OMPD_for)
4667     CancelKind = CancelLoop;
4668   else if (CancelRegion == OMPD_sections)
4669     CancelKind = CancelSections;
4670   else {
4671     assert(CancelRegion == OMPD_taskgroup);
4672     CancelKind = CancelTaskgroup;
4673   }
4674   return CancelKind;
4675 }
4676 
4677 void CGOpenMPRuntime::emitCancellationPointCall(
4678     CodeGenFunction &CGF, SourceLocation Loc,
4679     OpenMPDirectiveKind CancelRegion) {
4680   if (!CGF.HaveInsertPoint())
4681     return;
4682   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
4683   // global_tid, kmp_int32 cncl_kind);
4684   if (auto *OMPRegionInfo =
4685           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
4686     if (OMPRegionInfo->hasCancel()) {
4687       llvm::Value *Args[] = {
4688           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
4689           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
4690       // Ignore return result until untied tasks are supported.
4691       auto *Result = CGF.EmitRuntimeCall(
4692           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
4693       // if (__kmpc_cancellationpoint()) {
4694       //  __kmpc_cancel_barrier();
4695       //   exit from construct;
4696       // }
4697       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
4698       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
4699       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
4700       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
4701       CGF.EmitBlock(ExitBB);
4702       // __kmpc_cancel_barrier();
4703       emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
4704       // exit from construct;
4705       auto CancelDest =
4706           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
4707       CGF.EmitBranchThroughCleanup(CancelDest);
4708       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
4709     }
4710   }
4711 }
4712 
4713 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
4714                                      const Expr *IfCond,
4715                                      OpenMPDirectiveKind CancelRegion) {
4716   if (!CGF.HaveInsertPoint())
4717     return;
4718   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
4719   // kmp_int32 cncl_kind);
4720   if (auto *OMPRegionInfo =
4721           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
4722     auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
4723                                                         PrePostActionTy &) {
4724       auto &RT = CGF.CGM.getOpenMPRuntime();
4725       llvm::Value *Args[] = {
4726           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
4727           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
4728       // Ignore return result until untied tasks are supported.
4729       auto *Result = CGF.EmitRuntimeCall(
4730           RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
4731       // if (__kmpc_cancel()) {
4732       //  __kmpc_cancel_barrier();
4733       //   exit from construct;
4734       // }
4735       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
4736       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
4737       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
4738       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
4739       CGF.EmitBlock(ExitBB);
4740       // __kmpc_cancel_barrier();
4741       RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
4742       // exit from construct;
4743       auto CancelDest =
4744           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
4745       CGF.EmitBranchThroughCleanup(CancelDest);
4746       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
4747     };
4748     if (IfCond)
4749       emitOMPIfClause(CGF, IfCond, ThenGen,
4750                       [](CodeGenFunction &, PrePostActionTy &) {});
4751     else {
4752       RegionCodeGenTy ThenRCG(ThenGen);
4753       ThenRCG(CGF);
4754     }
4755   }
4756 }
4757 
4758 /// \brief Obtain information that uniquely identifies a target entry. This
4759 /// consists of the file and device IDs as well as line number associated with
4760 /// the relevant entry source location.
4761 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
4762                                      unsigned &DeviceID, unsigned &FileID,
4763                                      unsigned &LineNum) {
4764 
4765   auto &SM = C.getSourceManager();
4766 
4767   // The loc should be always valid and have a file ID (the user cannot use
4768   // #pragma directives in macros)
4769 
4770   assert(Loc.isValid() && "Source location is expected to be always valid.");
4771   assert(Loc.isFileID() && "Source location is expected to refer to a file.");
4772 
4773   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
4774   assert(PLoc.isValid() && "Source location is expected to be always valid.");
4775 
4776   llvm::sys::fs::UniqueID ID;
4777   if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
4778     llvm_unreachable("Source file with target region no longer exists!");
4779 
4780   DeviceID = ID.getDevice();
4781   FileID = ID.getFile();
4782   LineNum = PLoc.getLine();
4783 }
4784 
4785 void CGOpenMPRuntime::emitTargetOutlinedFunction(
4786     const OMPExecutableDirective &D, StringRef ParentName,
4787     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
4788     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
4789   assert(!ParentName.empty() && "Invalid target region parent name!");
4790 
4791   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
4792                                    IsOffloadEntry, CodeGen);
4793 }
4794 
4795 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
4796     const OMPExecutableDirective &D, StringRef ParentName,
4797     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
4798     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
4799   // Create a unique name for the entry function using the source location
4800   // information of the current target region. The name will be something like:
4801   //
4802   // __omp_offloading_DD_FFFF_PP_lBB
4803   //
4804   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
4805   // mangled name of the function that encloses the target region and BB is the
4806   // line number of the target region.
4807 
4808   unsigned DeviceID;
4809   unsigned FileID;
4810   unsigned Line;
4811   getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID,
4812                            Line);
4813   SmallString<64> EntryFnName;
4814   {
4815     llvm::raw_svector_ostream OS(EntryFnName);
4816     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
4817        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
4818   }
4819 
4820   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
4821 
4822   CodeGenFunction CGF(CGM, true);
4823   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
4824   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
4825 
4826   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
4827 
4828   // If this target outline function is not an offload entry, we don't need to
4829   // register it.
4830   if (!IsOffloadEntry)
4831     return;
4832 
4833   // The target region ID is used by the runtime library to identify the current
4834   // target region, so it only has to be unique and not necessarily point to
4835   // anything. It could be the pointer to the outlined function that implements
4836   // the target region, but we aren't using that so that the compiler doesn't
4837   // need to keep that, and could therefore inline the host function if proven
4838   // worthwhile during optimization. In the other hand, if emitting code for the
4839   // device, the ID has to be the function address so that it can retrieved from
4840   // the offloading entry and launched by the runtime library. We also mark the
4841   // outlined function to have external linkage in case we are emitting code for
4842   // the device, because these functions will be entry points to the device.
4843 
4844   if (CGM.getLangOpts().OpenMPIsDevice) {
4845     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
4846     OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage);
4847   } else
4848     OutlinedFnID = new llvm::GlobalVariable(
4849         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
4850         llvm::GlobalValue::PrivateLinkage,
4851         llvm::Constant::getNullValue(CGM.Int8Ty), ".omp_offload.region_id");
4852 
4853   // Register the information for the entry associated with this target region.
4854   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
4855       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID);
4856 }
4857 
4858 /// discard all CompoundStmts intervening between two constructs
4859 static const Stmt *ignoreCompoundStmts(const Stmt *Body) {
4860   while (auto *CS = dyn_cast_or_null<CompoundStmt>(Body))
4861     Body = CS->body_front();
4862 
4863   return Body;
4864 }
4865 
4866 /// \brief Emit the num_teams clause of an enclosed teams directive at the
4867 /// target region scope. If there is no teams directive associated with the
4868 /// target directive, or if there is no num_teams clause associated with the
4869 /// enclosed teams directive, return nullptr.
4870 static llvm::Value *
4871 emitNumTeamsClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime,
4872                                      CodeGenFunction &CGF,
4873                                      const OMPExecutableDirective &D) {
4874 
4875   assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
4876                                               "teams directive expected to be "
4877                                               "emitted only for the host!");
4878 
4879   // FIXME: For the moment we do not support combined directives with target and
4880   // teams, so we do not expect to get any num_teams clause in the provided
4881   // directive. Once we support that, this assertion can be replaced by the
4882   // actual emission of the clause expression.
4883   assert(D.getSingleClause<OMPNumTeamsClause>() == nullptr &&
4884          "Not expecting clause in directive.");
4885 
4886   // If the current target region has a teams region enclosed, we need to get
4887   // the number of teams to pass to the runtime function call. This is done
4888   // by generating the expression in a inlined region. This is required because
4889   // the expression is captured in the enclosing target environment when the
4890   // teams directive is not combined with target.
4891 
4892   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
4893 
4894   // FIXME: Accommodate other combined directives with teams when they become
4895   // available.
4896   if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>(
4897           ignoreCompoundStmts(CS.getCapturedStmt()))) {
4898     if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) {
4899       CGOpenMPInnerExprInfo CGInfo(CGF, CS);
4900       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
4901       llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams());
4902       return CGF.Builder.CreateIntCast(NumTeams, CGF.Int32Ty,
4903                                        /*IsSigned=*/true);
4904     }
4905 
4906     // If we have an enclosed teams directive but no num_teams clause we use
4907     // the default value 0.
4908     return CGF.Builder.getInt32(0);
4909   }
4910 
4911   // No teams associated with the directive.
4912   return nullptr;
4913 }
4914 
4915 /// \brief Emit the thread_limit clause of an enclosed teams directive at the
4916 /// target region scope. If there is no teams directive associated with the
4917 /// target directive, or if there is no thread_limit clause associated with the
4918 /// enclosed teams directive, return nullptr.
4919 static llvm::Value *
4920 emitThreadLimitClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime,
4921                                         CodeGenFunction &CGF,
4922                                         const OMPExecutableDirective &D) {
4923 
4924   assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
4925                                               "teams directive expected to be "
4926                                               "emitted only for the host!");
4927 
4928   // FIXME: For the moment we do not support combined directives with target and
4929   // teams, so we do not expect to get any thread_limit clause in the provided
4930   // directive. Once we support that, this assertion can be replaced by the
4931   // actual emission of the clause expression.
4932   assert(D.getSingleClause<OMPThreadLimitClause>() == nullptr &&
4933          "Not expecting clause in directive.");
4934 
4935   // If the current target region has a teams region enclosed, we need to get
4936   // the thread limit to pass to the runtime function call. This is done
4937   // by generating the expression in a inlined region. This is required because
4938   // the expression is captured in the enclosing target environment when the
4939   // teams directive is not combined with target.
4940 
4941   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
4942 
4943   // FIXME: Accommodate other combined directives with teams when they become
4944   // available.
4945   if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>(
4946           ignoreCompoundStmts(CS.getCapturedStmt()))) {
4947     if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) {
4948       CGOpenMPInnerExprInfo CGInfo(CGF, CS);
4949       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
4950       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit());
4951       return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty,
4952                                        /*IsSigned=*/true);
4953     }
4954 
4955     // If we have an enclosed teams directive but no thread_limit clause we use
4956     // the default value 0.
4957     return CGF.Builder.getInt32(0);
4958   }
4959 
4960   // No teams associated with the directive.
4961   return nullptr;
4962 }
4963 
4964 namespace {
4965 // \brief Utility to handle information from clauses associated with a given
4966 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
4967 // It provides a convenient interface to obtain the information and generate
4968 // code for that information.
4969 class MappableExprsHandler {
4970 public:
4971   /// \brief Values for bit flags used to specify the mapping type for
4972   /// offloading.
4973   enum OpenMPOffloadMappingFlags {
4974     /// \brief Allocate memory on the device and move data from host to device.
4975     OMP_MAP_TO = 0x01,
4976     /// \brief Allocate memory on the device and move data from device to host.
4977     OMP_MAP_FROM = 0x02,
4978     /// \brief Always perform the requested mapping action on the element, even
4979     /// if it was already mapped before.
4980     OMP_MAP_ALWAYS = 0x04,
4981     /// \brief Delete the element from the device environment, ignoring the
4982     /// current reference count associated with the element.
4983     OMP_MAP_DELETE = 0x08,
4984     /// \brief The element being mapped is a pointer, therefore the pointee
4985     /// should be mapped as well.
4986     OMP_MAP_IS_PTR = 0x10,
4987     /// \brief This flags signals that an argument is the first one relating to
4988     /// a map/private clause expression. For some cases a single
4989     /// map/privatization results in multiple arguments passed to the runtime
4990     /// library.
4991     OMP_MAP_FIRST_REF = 0x20,
4992     /// \brief Signal that the runtime library has to return the device pointer
4993     /// in the current position for the data being mapped.
4994     OMP_MAP_RETURN_PTR = 0x40,
4995     /// \brief This flag signals that the reference being passed is a pointer to
4996     /// private data.
4997     OMP_MAP_PRIVATE_PTR = 0x80,
4998     /// \brief Pass the element to the device by value.
4999     OMP_MAP_PRIVATE_VAL = 0x100,
5000   };
5001 
5002   /// Class that associates information with a base pointer to be passed to the
5003   /// runtime library.
5004   class BasePointerInfo {
5005     /// The base pointer.
5006     llvm::Value *Ptr = nullptr;
5007     /// The base declaration that refers to this device pointer, or null if
5008     /// there is none.
5009     const ValueDecl *DevPtrDecl = nullptr;
5010 
5011   public:
5012     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
5013         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
5014     llvm::Value *operator*() const { return Ptr; }
5015     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
5016     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
5017   };
5018 
5019   typedef SmallVector<BasePointerInfo, 16> MapBaseValuesArrayTy;
5020   typedef SmallVector<llvm::Value *, 16> MapValuesArrayTy;
5021   typedef SmallVector<unsigned, 16> MapFlagsArrayTy;
5022 
5023 private:
5024   /// \brief Directive from where the map clauses were extracted.
5025   const OMPExecutableDirective &CurDir;
5026 
5027   /// \brief Function the directive is being generated for.
5028   CodeGenFunction &CGF;
5029 
5030   /// \brief Set of all first private variables in the current directive.
5031   llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls;
5032 
5033   /// Map between device pointer declarations and their expression components.
5034   /// The key value for declarations in 'this' is null.
5035   llvm::DenseMap<
5036       const ValueDecl *,
5037       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
5038       DevPointersMap;
5039 
5040   llvm::Value *getExprTypeSize(const Expr *E) const {
5041     auto ExprTy = E->getType().getCanonicalType();
5042 
5043     // Reference types are ignored for mapping purposes.
5044     if (auto *RefTy = ExprTy->getAs<ReferenceType>())
5045       ExprTy = RefTy->getPointeeType().getCanonicalType();
5046 
5047     // Given that an array section is considered a built-in type, we need to
5048     // do the calculation based on the length of the section instead of relying
5049     // on CGF.getTypeSize(E->getType()).
5050     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
5051       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
5052                             OAE->getBase()->IgnoreParenImpCasts())
5053                             .getCanonicalType();
5054 
5055       // If there is no length associated with the expression, that means we
5056       // are using the whole length of the base.
5057       if (!OAE->getLength() && OAE->getColonLoc().isValid())
5058         return CGF.getTypeSize(BaseTy);
5059 
5060       llvm::Value *ElemSize;
5061       if (auto *PTy = BaseTy->getAs<PointerType>())
5062         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
5063       else {
5064         auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
5065         assert(ATy && "Expecting array type if not a pointer type.");
5066         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
5067       }
5068 
5069       // If we don't have a length at this point, that is because we have an
5070       // array section with a single element.
5071       if (!OAE->getLength())
5072         return ElemSize;
5073 
5074       auto *LengthVal = CGF.EmitScalarExpr(OAE->getLength());
5075       LengthVal =
5076           CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false);
5077       return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
5078     }
5079     return CGF.getTypeSize(ExprTy);
5080   }
5081 
5082   /// \brief Return the corresponding bits for a given map clause modifier. Add
5083   /// a flag marking the map as a pointer if requested. Add a flag marking the
5084   /// map as the first one of a series of maps that relate to the same map
5085   /// expression.
5086   unsigned getMapTypeBits(OpenMPMapClauseKind MapType,
5087                           OpenMPMapClauseKind MapTypeModifier, bool AddPtrFlag,
5088                           bool AddIsFirstFlag) const {
5089     unsigned Bits = 0u;
5090     switch (MapType) {
5091     case OMPC_MAP_alloc:
5092     case OMPC_MAP_release:
5093       // alloc and release is the default behavior in the runtime library,  i.e.
5094       // if we don't pass any bits alloc/release that is what the runtime is
5095       // going to do. Therefore, we don't need to signal anything for these two
5096       // type modifiers.
5097       break;
5098     case OMPC_MAP_to:
5099       Bits = OMP_MAP_TO;
5100       break;
5101     case OMPC_MAP_from:
5102       Bits = OMP_MAP_FROM;
5103       break;
5104     case OMPC_MAP_tofrom:
5105       Bits = OMP_MAP_TO | OMP_MAP_FROM;
5106       break;
5107     case OMPC_MAP_delete:
5108       Bits = OMP_MAP_DELETE;
5109       break;
5110     default:
5111       llvm_unreachable("Unexpected map type!");
5112       break;
5113     }
5114     if (AddPtrFlag)
5115       Bits |= OMP_MAP_IS_PTR;
5116     if (AddIsFirstFlag)
5117       Bits |= OMP_MAP_FIRST_REF;
5118     if (MapTypeModifier == OMPC_MAP_always)
5119       Bits |= OMP_MAP_ALWAYS;
5120     return Bits;
5121   }
5122 
5123   /// \brief Return true if the provided expression is a final array section. A
5124   /// final array section, is one whose length can't be proved to be one.
5125   bool isFinalArraySectionExpression(const Expr *E) const {
5126     auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
5127 
5128     // It is not an array section and therefore not a unity-size one.
5129     if (!OASE)
5130       return false;
5131 
5132     // An array section with no colon always refer to a single element.
5133     if (OASE->getColonLoc().isInvalid())
5134       return false;
5135 
5136     auto *Length = OASE->getLength();
5137 
5138     // If we don't have a length we have to check if the array has size 1
5139     // for this dimension. Also, we should always expect a length if the
5140     // base type is pointer.
5141     if (!Length) {
5142       auto BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
5143                          OASE->getBase()->IgnoreParenImpCasts())
5144                          .getCanonicalType();
5145       if (auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
5146         return ATy->getSize().getSExtValue() != 1;
5147       // If we don't have a constant dimension length, we have to consider
5148       // the current section as having any size, so it is not necessarily
5149       // unitary. If it happen to be unity size, that's user fault.
5150       return true;
5151     }
5152 
5153     // Check if the length evaluates to 1.
5154     llvm::APSInt ConstLength;
5155     if (!Length->EvaluateAsInt(ConstLength, CGF.getContext()))
5156       return true; // Can have more that size 1.
5157 
5158     return ConstLength.getSExtValue() != 1;
5159   }
5160 
5161   /// \brief Generate the base pointers, section pointers, sizes and map type
5162   /// bits for the provided map type, map modifier, and expression components.
5163   /// \a IsFirstComponent should be set to true if the provided set of
5164   /// components is the first associated with a capture.
5165   void generateInfoForComponentList(
5166       OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier,
5167       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
5168       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
5169       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
5170       bool IsFirstComponentList) const {
5171 
5172     // The following summarizes what has to be generated for each map and the
5173     // types bellow. The generated information is expressed in this order:
5174     // base pointer, section pointer, size, flags
5175     // (to add to the ones that come from the map type and modifier).
5176     //
5177     // double d;
5178     // int i[100];
5179     // float *p;
5180     //
5181     // struct S1 {
5182     //   int i;
5183     //   float f[50];
5184     // }
5185     // struct S2 {
5186     //   int i;
5187     //   float f[50];
5188     //   S1 s;
5189     //   double *p;
5190     //   struct S2 *ps;
5191     // }
5192     // S2 s;
5193     // S2 *ps;
5194     //
5195     // map(d)
5196     // &d, &d, sizeof(double), noflags
5197     //
5198     // map(i)
5199     // &i, &i, 100*sizeof(int), noflags
5200     //
5201     // map(i[1:23])
5202     // &i(=&i[0]), &i[1], 23*sizeof(int), noflags
5203     //
5204     // map(p)
5205     // &p, &p, sizeof(float*), noflags
5206     //
5207     // map(p[1:24])
5208     // p, &p[1], 24*sizeof(float), noflags
5209     //
5210     // map(s)
5211     // &s, &s, sizeof(S2), noflags
5212     //
5213     // map(s.i)
5214     // &s, &(s.i), sizeof(int), noflags
5215     //
5216     // map(s.s.f)
5217     // &s, &(s.i.f), 50*sizeof(int), noflags
5218     //
5219     // map(s.p)
5220     // &s, &(s.p), sizeof(double*), noflags
5221     //
5222     // map(s.p[:22], s.a s.b)
5223     // &s, &(s.p), sizeof(double*), noflags
5224     // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag + extra_flag
5225     //
5226     // map(s.ps)
5227     // &s, &(s.ps), sizeof(S2*), noflags
5228     //
5229     // map(s.ps->s.i)
5230     // &s, &(s.ps), sizeof(S2*), noflags
5231     // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag + extra_flag
5232     //
5233     // map(s.ps->ps)
5234     // &s, &(s.ps), sizeof(S2*), noflags
5235     // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
5236     //
5237     // map(s.ps->ps->ps)
5238     // &s, &(s.ps), sizeof(S2*), noflags
5239     // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
5240     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
5241     //
5242     // map(s.ps->ps->s.f[:22])
5243     // &s, &(s.ps), sizeof(S2*), noflags
5244     // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
5245     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + extra_flag
5246     //
5247     // map(ps)
5248     // &ps, &ps, sizeof(S2*), noflags
5249     //
5250     // map(ps->i)
5251     // ps, &(ps->i), sizeof(int), noflags
5252     //
5253     // map(ps->s.f)
5254     // ps, &(ps->s.f[0]), 50*sizeof(float), noflags
5255     //
5256     // map(ps->p)
5257     // ps, &(ps->p), sizeof(double*), noflags
5258     //
5259     // map(ps->p[:22])
5260     // ps, &(ps->p), sizeof(double*), noflags
5261     // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag + extra_flag
5262     //
5263     // map(ps->ps)
5264     // ps, &(ps->ps), sizeof(S2*), noflags
5265     //
5266     // map(ps->ps->s.i)
5267     // ps, &(ps->ps), sizeof(S2*), noflags
5268     // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag + extra_flag
5269     //
5270     // map(ps->ps->ps)
5271     // ps, &(ps->ps), sizeof(S2*), noflags
5272     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
5273     //
5274     // map(ps->ps->ps->ps)
5275     // ps, &(ps->ps), sizeof(S2*), noflags
5276     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
5277     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
5278     //
5279     // map(ps->ps->ps->s.f[:22])
5280     // ps, &(ps->ps), sizeof(S2*), noflags
5281     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
5282     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag +
5283     // extra_flag
5284 
5285     // Track if the map information being generated is the first for a capture.
5286     bool IsCaptureFirstInfo = IsFirstComponentList;
5287 
5288     // Scan the components from the base to the complete expression.
5289     auto CI = Components.rbegin();
5290     auto CE = Components.rend();
5291     auto I = CI;
5292 
5293     // Track if the map information being generated is the first for a list of
5294     // components.
5295     bool IsExpressionFirstInfo = true;
5296     llvm::Value *BP = nullptr;
5297 
5298     if (auto *ME = dyn_cast<MemberExpr>(I->getAssociatedExpression())) {
5299       // The base is the 'this' pointer. The content of the pointer is going
5300       // to be the base of the field being mapped.
5301       BP = CGF.EmitScalarExpr(ME->getBase());
5302     } else {
5303       // The base is the reference to the variable.
5304       // BP = &Var.
5305       BP = CGF.EmitLValue(cast<DeclRefExpr>(I->getAssociatedExpression()))
5306                .getPointer();
5307 
5308       // If the variable is a pointer and is being dereferenced (i.e. is not
5309       // the last component), the base has to be the pointer itself, not its
5310       // reference. References are ignored for mapping purposes.
5311       QualType Ty =
5312           I->getAssociatedDeclaration()->getType().getNonReferenceType();
5313       if (Ty->isAnyPointerType() && std::next(I) != CE) {
5314         auto PtrAddr = CGF.MakeNaturalAlignAddrLValue(BP, Ty);
5315         BP = CGF.EmitLoadOfPointerLValue(PtrAddr.getAddress(),
5316                                          Ty->castAs<PointerType>())
5317                  .getPointer();
5318 
5319         // We do not need to generate individual map information for the
5320         // pointer, it can be associated with the combined storage.
5321         ++I;
5322       }
5323     }
5324 
5325     for (; I != CE; ++I) {
5326       auto Next = std::next(I);
5327 
5328       // We need to generate the addresses and sizes if this is the last
5329       // component, if the component is a pointer or if it is an array section
5330       // whose length can't be proved to be one. If this is a pointer, it
5331       // becomes the base address for the following components.
5332 
5333       // A final array section, is one whose length can't be proved to be one.
5334       bool IsFinalArraySection =
5335           isFinalArraySectionExpression(I->getAssociatedExpression());
5336 
5337       // Get information on whether the element is a pointer. Have to do a
5338       // special treatment for array sections given that they are built-in
5339       // types.
5340       const auto *OASE =
5341           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
5342       bool IsPointer =
5343           (OASE &&
5344            OMPArraySectionExpr::getBaseOriginalType(OASE)
5345                .getCanonicalType()
5346                ->isAnyPointerType()) ||
5347           I->getAssociatedExpression()->getType()->isAnyPointerType();
5348 
5349       if (Next == CE || IsPointer || IsFinalArraySection) {
5350 
5351         // If this is not the last component, we expect the pointer to be
5352         // associated with an array expression or member expression.
5353         assert((Next == CE ||
5354                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
5355                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
5356                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
5357                "Unexpected expression");
5358 
5359         auto *LB = CGF.EmitLValue(I->getAssociatedExpression()).getPointer();
5360         auto *Size = getExprTypeSize(I->getAssociatedExpression());
5361 
5362         // If we have a member expression and the current component is a
5363         // reference, we have to map the reference too. Whenever we have a
5364         // reference, the section that reference refers to is going to be a
5365         // load instruction from the storage assigned to the reference.
5366         if (isa<MemberExpr>(I->getAssociatedExpression()) &&
5367             I->getAssociatedDeclaration()->getType()->isReferenceType()) {
5368           auto *LI = cast<llvm::LoadInst>(LB);
5369           auto *RefAddr = LI->getPointerOperand();
5370 
5371           BasePointers.push_back(BP);
5372           Pointers.push_back(RefAddr);
5373           Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
5374           Types.push_back(getMapTypeBits(
5375               /*MapType*/ OMPC_MAP_alloc, /*MapTypeModifier=*/OMPC_MAP_unknown,
5376               !IsExpressionFirstInfo, IsCaptureFirstInfo));
5377           IsExpressionFirstInfo = false;
5378           IsCaptureFirstInfo = false;
5379           // The reference will be the next base address.
5380           BP = RefAddr;
5381         }
5382 
5383         BasePointers.push_back(BP);
5384         Pointers.push_back(LB);
5385         Sizes.push_back(Size);
5386 
5387         // We need to add a pointer flag for each map that comes from the
5388         // same expression except for the first one. We also need to signal
5389         // this map is the first one that relates with the current capture
5390         // (there is a set of entries for each capture).
5391         Types.push_back(getMapTypeBits(MapType, MapTypeModifier,
5392                                        !IsExpressionFirstInfo,
5393                                        IsCaptureFirstInfo));
5394 
5395         // If we have a final array section, we are done with this expression.
5396         if (IsFinalArraySection)
5397           break;
5398 
5399         // The pointer becomes the base for the next element.
5400         if (Next != CE)
5401           BP = LB;
5402 
5403         IsExpressionFirstInfo = false;
5404         IsCaptureFirstInfo = false;
5405         continue;
5406       }
5407     }
5408   }
5409 
5410   /// \brief Return the adjusted map modifiers if the declaration a capture
5411   /// refers to appears in a first-private clause. This is expected to be used
5412   /// only with directives that start with 'target'.
5413   unsigned adjustMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap,
5414                                                unsigned CurrentModifiers) {
5415     assert(Cap.capturesVariable() && "Expected capture by reference only!");
5416 
5417     // A first private variable captured by reference will use only the
5418     // 'private ptr' and 'map to' flag. Return the right flags if the captured
5419     // declaration is known as first-private in this handler.
5420     if (FirstPrivateDecls.count(Cap.getCapturedVar()))
5421       return MappableExprsHandler::OMP_MAP_PRIVATE_PTR |
5422              MappableExprsHandler::OMP_MAP_TO;
5423 
5424     // We didn't modify anything.
5425     return CurrentModifiers;
5426   }
5427 
5428 public:
5429   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
5430       : CurDir(Dir), CGF(CGF) {
5431     // Extract firstprivate clause information.
5432     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
5433       for (const auto *D : C->varlists())
5434         FirstPrivateDecls.insert(
5435             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
5436     // Extract device pointer clause information.
5437     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
5438       for (auto L : C->component_lists())
5439         DevPointersMap[L.first].push_back(L.second);
5440   }
5441 
5442   /// \brief Generate all the base pointers, section pointers, sizes and map
5443   /// types for the extracted mappable expressions. Also, for each item that
5444   /// relates with a device pointer, a pair of the relevant declaration and
5445   /// index where it occurs is appended to the device pointers info array.
5446   void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
5447                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
5448                        MapFlagsArrayTy &Types) const {
5449     BasePointers.clear();
5450     Pointers.clear();
5451     Sizes.clear();
5452     Types.clear();
5453 
5454     struct MapInfo {
5455       /// Kind that defines how a device pointer has to be returned.
5456       enum ReturnPointerKind {
5457         // Don't have to return any pointer.
5458         RPK_None,
5459         // Pointer is the base of the declaration.
5460         RPK_Base,
5461         // Pointer is a member of the base declaration - 'this'
5462         RPK_Member,
5463         // Pointer is a reference and a member of the base declaration - 'this'
5464         RPK_MemberReference,
5465       };
5466       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
5467       OpenMPMapClauseKind MapType;
5468       OpenMPMapClauseKind MapTypeModifier;
5469       ReturnPointerKind ReturnDevicePointer;
5470 
5471       MapInfo()
5472           : MapType(OMPC_MAP_unknown), MapTypeModifier(OMPC_MAP_unknown),
5473             ReturnDevicePointer(RPK_None) {}
5474       MapInfo(
5475           OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
5476           OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier,
5477           ReturnPointerKind ReturnDevicePointer)
5478           : Components(Components), MapType(MapType),
5479             MapTypeModifier(MapTypeModifier),
5480             ReturnDevicePointer(ReturnDevicePointer) {}
5481     };
5482 
5483     // We have to process the component lists that relate with the same
5484     // declaration in a single chunk so that we can generate the map flags
5485     // correctly. Therefore, we organize all lists in a map.
5486     llvm::DenseMap<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
5487 
5488     // Helper function to fill the information map for the different supported
5489     // clauses.
5490     auto &&InfoGen = [&Info](
5491         const ValueDecl *D,
5492         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
5493         OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier,
5494         MapInfo::ReturnPointerKind ReturnDevicePointer) {
5495       const ValueDecl *VD =
5496           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
5497       Info[VD].push_back({L, MapType, MapModifier, ReturnDevicePointer});
5498     };
5499 
5500     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
5501     for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
5502       for (auto L : C->component_lists())
5503         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier(),
5504                 MapInfo::RPK_None);
5505     for (auto *C : this->CurDir.getClausesOfKind<OMPToClause>())
5506       for (auto L : C->component_lists())
5507         InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown,
5508                 MapInfo::RPK_None);
5509     for (auto *C : this->CurDir.getClausesOfKind<OMPFromClause>())
5510       for (auto L : C->component_lists())
5511         InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown,
5512                 MapInfo::RPK_None);
5513 
5514     // Look at the use_device_ptr clause information and mark the existing map
5515     // entries as such. If there is no map information for an entry in the
5516     // use_device_ptr list, we create one with map type 'alloc' and zero size
5517     // section. It is the user fault if that was not mapped before.
5518     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
5519     for (auto *C : this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>())
5520       for (auto L : C->component_lists()) {
5521         assert(!L.second.empty() && "Not expecting empty list of components!");
5522         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
5523         VD = cast<ValueDecl>(VD->getCanonicalDecl());
5524         auto *IE = L.second.back().getAssociatedExpression();
5525         // If the first component is a member expression, we have to look into
5526         // 'this', which maps to null in the map of map information. Otherwise
5527         // look directly for the information.
5528         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
5529 
5530         // We potentially have map information for this declaration already.
5531         // Look for the first set of components that refer to it.
5532         if (It != Info.end()) {
5533           auto CI = std::find_if(
5534               It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
5535                 return MI.Components.back().getAssociatedDeclaration() == VD;
5536               });
5537           // If we found a map entry, signal that the pointer has to be returned
5538           // and move on to the next declaration.
5539           if (CI != It->second.end()) {
5540             CI->ReturnDevicePointer = isa<MemberExpr>(IE)
5541                                           ? (VD->getType()->isReferenceType()
5542                                                  ? MapInfo::RPK_MemberReference
5543                                                  : MapInfo::RPK_Member)
5544                                           : MapInfo::RPK_Base;
5545             continue;
5546           }
5547         }
5548 
5549         // We didn't find any match in our map information - generate a zero
5550         // size array section.
5551         // FIXME: MSVC 2013 seems to require this-> to find member CGF.
5552         llvm::Value *Ptr =
5553             this->CGF
5554                 .EmitLoadOfLValue(this->CGF.EmitLValue(IE), SourceLocation())
5555                 .getScalarVal();
5556         BasePointers.push_back({Ptr, VD});
5557         Pointers.push_back(Ptr);
5558         Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy));
5559         Types.push_back(OMP_MAP_RETURN_PTR | OMP_MAP_FIRST_REF);
5560       }
5561 
5562     for (auto &M : Info) {
5563       // We need to know when we generate information for the first component
5564       // associated with a capture, because the mapping flags depend on it.
5565       bool IsFirstComponentList = true;
5566       for (MapInfo &L : M.second) {
5567         assert(!L.Components.empty() &&
5568                "Not expecting declaration with no component lists.");
5569 
5570         // Remember the current base pointer index.
5571         unsigned CurrentBasePointersIdx = BasePointers.size();
5572         // FIXME: MSVC 2013 seems to require this-> to find the member method.
5573         this->generateInfoForComponentList(L.MapType, L.MapTypeModifier,
5574                                            L.Components, BasePointers, Pointers,
5575                                            Sizes, Types, IsFirstComponentList);
5576 
5577         // If this entry relates with a device pointer, set the relevant
5578         // declaration and add the 'return pointer' flag.
5579         if (IsFirstComponentList &&
5580             L.ReturnDevicePointer != MapInfo::RPK_None) {
5581           // If the pointer is not the base of the map, we need to skip the
5582           // base. If it is a reference in a member field, we also need to skip
5583           // the map of the reference.
5584           if (L.ReturnDevicePointer != MapInfo::RPK_Base) {
5585             ++CurrentBasePointersIdx;
5586             if (L.ReturnDevicePointer == MapInfo::RPK_MemberReference)
5587               ++CurrentBasePointersIdx;
5588           }
5589           assert(BasePointers.size() > CurrentBasePointersIdx &&
5590                  "Unexpected number of mapped base pointers.");
5591 
5592           auto *RelevantVD = L.Components.back().getAssociatedDeclaration();
5593           assert(RelevantVD &&
5594                  "No relevant declaration related with device pointer??");
5595 
5596           BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
5597           Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PTR;
5598         }
5599         IsFirstComponentList = false;
5600       }
5601     }
5602   }
5603 
5604   /// \brief Generate the base pointers, section pointers, sizes and map types
5605   /// associated to a given capture.
5606   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
5607                               llvm::Value *Arg,
5608                               MapBaseValuesArrayTy &BasePointers,
5609                               MapValuesArrayTy &Pointers,
5610                               MapValuesArrayTy &Sizes,
5611                               MapFlagsArrayTy &Types) const {
5612     assert(!Cap->capturesVariableArrayType() &&
5613            "Not expecting to generate map info for a variable array type!");
5614 
5615     BasePointers.clear();
5616     Pointers.clear();
5617     Sizes.clear();
5618     Types.clear();
5619 
5620     // We need to know when we generating information for the first component
5621     // associated with a capture, because the mapping flags depend on it.
5622     bool IsFirstComponentList = true;
5623 
5624     const ValueDecl *VD =
5625         Cap->capturesThis()
5626             ? nullptr
5627             : cast<ValueDecl>(Cap->getCapturedVar()->getCanonicalDecl());
5628 
5629     // If this declaration appears in a is_device_ptr clause we just have to
5630     // pass the pointer by value. If it is a reference to a declaration, we just
5631     // pass its value, otherwise, if it is a member expression, we need to map
5632     // 'to' the field.
5633     if (!VD) {
5634       auto It = DevPointersMap.find(VD);
5635       if (It != DevPointersMap.end()) {
5636         for (auto L : It->second) {
5637           generateInfoForComponentList(
5638               /*MapType=*/OMPC_MAP_to, /*MapTypeModifier=*/OMPC_MAP_unknown, L,
5639               BasePointers, Pointers, Sizes, Types, IsFirstComponentList);
5640           IsFirstComponentList = false;
5641         }
5642         return;
5643       }
5644     } else if (DevPointersMap.count(VD)) {
5645       BasePointers.push_back({Arg, VD});
5646       Pointers.push_back(Arg);
5647       Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
5648       Types.push_back(OMP_MAP_PRIVATE_VAL | OMP_MAP_FIRST_REF);
5649       return;
5650     }
5651 
5652     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
5653     for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
5654       for (auto L : C->decl_component_lists(VD)) {
5655         assert(L.first == VD &&
5656                "We got information for the wrong declaration??");
5657         assert(!L.second.empty() &&
5658                "Not expecting declaration with no component lists.");
5659         generateInfoForComponentList(C->getMapType(), C->getMapTypeModifier(),
5660                                      L.second, BasePointers, Pointers, Sizes,
5661                                      Types, IsFirstComponentList);
5662         IsFirstComponentList = false;
5663       }
5664 
5665     return;
5666   }
5667 
5668   /// \brief Generate the default map information for a given capture \a CI,
5669   /// record field declaration \a RI and captured value \a CV.
5670   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
5671                               const FieldDecl &RI, llvm::Value *CV,
5672                               MapBaseValuesArrayTy &CurBasePointers,
5673                               MapValuesArrayTy &CurPointers,
5674                               MapValuesArrayTy &CurSizes,
5675                               MapFlagsArrayTy &CurMapTypes) {
5676 
5677     // Do the default mapping.
5678     if (CI.capturesThis()) {
5679       CurBasePointers.push_back(CV);
5680       CurPointers.push_back(CV);
5681       const PointerType *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
5682       CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType()));
5683       // Default map type.
5684       CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
5685     } else if (CI.capturesVariableByCopy()) {
5686       CurBasePointers.push_back(CV);
5687       CurPointers.push_back(CV);
5688       if (!RI.getType()->isAnyPointerType()) {
5689         // We have to signal to the runtime captures passed by value that are
5690         // not pointers.
5691         CurMapTypes.push_back(OMP_MAP_PRIVATE_VAL);
5692         CurSizes.push_back(CGF.getTypeSize(RI.getType()));
5693       } else {
5694         // Pointers are implicitly mapped with a zero size and no flags
5695         // (other than first map that is added for all implicit maps).
5696         CurMapTypes.push_back(0u);
5697         CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy));
5698       }
5699     } else {
5700       assert(CI.capturesVariable() && "Expected captured reference.");
5701       CurBasePointers.push_back(CV);
5702       CurPointers.push_back(CV);
5703 
5704       const ReferenceType *PtrTy =
5705           cast<ReferenceType>(RI.getType().getTypePtr());
5706       QualType ElementType = PtrTy->getPointeeType();
5707       CurSizes.push_back(CGF.getTypeSize(ElementType));
5708       // The default map type for a scalar/complex type is 'to' because by
5709       // default the value doesn't have to be retrieved. For an aggregate
5710       // type, the default is 'tofrom'.
5711       CurMapTypes.push_back(ElementType->isAggregateType()
5712                                 ? (OMP_MAP_TO | OMP_MAP_FROM)
5713                                 : OMP_MAP_TO);
5714 
5715       // If we have a capture by reference we may need to add the private
5716       // pointer flag if the base declaration shows in some first-private
5717       // clause.
5718       CurMapTypes.back() =
5719           adjustMapModifiersForPrivateClauses(CI, CurMapTypes.back());
5720     }
5721     // Every default map produces a single argument, so, it is always the
5722     // first one.
5723     CurMapTypes.back() |= OMP_MAP_FIRST_REF;
5724   }
5725 };
5726 
5727 enum OpenMPOffloadingReservedDeviceIDs {
5728   /// \brief Device ID if the device was not defined, runtime should get it
5729   /// from environment variables in the spec.
5730   OMP_DEVICEID_UNDEF = -1,
5731 };
5732 } // anonymous namespace
5733 
5734 /// \brief Emit the arrays used to pass the captures and map information to the
5735 /// offloading runtime library. If there is no map or capture information,
5736 /// return nullptr by reference.
5737 static void
5738 emitOffloadingArrays(CodeGenFunction &CGF,
5739                      MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
5740                      MappableExprsHandler::MapValuesArrayTy &Pointers,
5741                      MappableExprsHandler::MapValuesArrayTy &Sizes,
5742                      MappableExprsHandler::MapFlagsArrayTy &MapTypes,
5743                      CGOpenMPRuntime::TargetDataInfo &Info) {
5744   auto &CGM = CGF.CGM;
5745   auto &Ctx = CGF.getContext();
5746 
5747   // Reset the array information.
5748   Info.clearArrayInfo();
5749   Info.NumberOfPtrs = BasePointers.size();
5750 
5751   if (Info.NumberOfPtrs) {
5752     // Detect if we have any capture size requiring runtime evaluation of the
5753     // size so that a constant array could be eventually used.
5754     bool hasRuntimeEvaluationCaptureSize = false;
5755     for (auto *S : Sizes)
5756       if (!isa<llvm::Constant>(S)) {
5757         hasRuntimeEvaluationCaptureSize = true;
5758         break;
5759       }
5760 
5761     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
5762     QualType PointerArrayType =
5763         Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
5764                                  /*IndexTypeQuals=*/0);
5765 
5766     Info.BasePointersArray =
5767         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
5768     Info.PointersArray =
5769         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
5770 
5771     // If we don't have any VLA types or other types that require runtime
5772     // evaluation, we can use a constant array for the map sizes, otherwise we
5773     // need to fill up the arrays as we do for the pointers.
5774     if (hasRuntimeEvaluationCaptureSize) {
5775       QualType SizeArrayType = Ctx.getConstantArrayType(
5776           Ctx.getSizeType(), PointerNumAP, ArrayType::Normal,
5777           /*IndexTypeQuals=*/0);
5778       Info.SizesArray =
5779           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
5780     } else {
5781       // We expect all the sizes to be constant, so we collect them to create
5782       // a constant array.
5783       SmallVector<llvm::Constant *, 16> ConstSizes;
5784       for (auto S : Sizes)
5785         ConstSizes.push_back(cast<llvm::Constant>(S));
5786 
5787       auto *SizesArrayInit = llvm::ConstantArray::get(
5788           llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes);
5789       auto *SizesArrayGbl = new llvm::GlobalVariable(
5790           CGM.getModule(), SizesArrayInit->getType(),
5791           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
5792           SizesArrayInit, ".offload_sizes");
5793       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
5794       Info.SizesArray = SizesArrayGbl;
5795     }
5796 
5797     // The map types are always constant so we don't need to generate code to
5798     // fill arrays. Instead, we create an array constant.
5799     llvm::Constant *MapTypesArrayInit =
5800         llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes);
5801     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
5802         CGM.getModule(), MapTypesArrayInit->getType(),
5803         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
5804         MapTypesArrayInit, ".offload_maptypes");
5805     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
5806     Info.MapTypesArray = MapTypesArrayGbl;
5807 
5808     for (unsigned i = 0; i < Info.NumberOfPtrs; ++i) {
5809       llvm::Value *BPVal = *BasePointers[i];
5810       if (BPVal->getType()->isPointerTy())
5811         BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy);
5812       else {
5813         assert(BPVal->getType()->isIntegerTy() &&
5814                "If not a pointer, the value type must be an integer.");
5815         BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy);
5816       }
5817       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
5818           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
5819           Info.BasePointersArray, 0, i);
5820       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
5821       CGF.Builder.CreateStore(BPVal, BPAddr);
5822 
5823       if (Info.requiresDevicePointerInfo())
5824         if (auto *DevVD = BasePointers[i].getDevicePtrDecl())
5825           Info.CaptureDeviceAddrMap.insert(std::make_pair(DevVD, BPAddr));
5826 
5827       llvm::Value *PVal = Pointers[i];
5828       if (PVal->getType()->isPointerTy())
5829         PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy);
5830       else {
5831         assert(PVal->getType()->isIntegerTy() &&
5832                "If not a pointer, the value type must be an integer.");
5833         PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy);
5834       }
5835       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
5836           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
5837           Info.PointersArray, 0, i);
5838       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
5839       CGF.Builder.CreateStore(PVal, PAddr);
5840 
5841       if (hasRuntimeEvaluationCaptureSize) {
5842         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
5843             llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs),
5844             Info.SizesArray,
5845             /*Idx0=*/0,
5846             /*Idx1=*/i);
5847         Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType()));
5848         CGF.Builder.CreateStore(
5849             CGF.Builder.CreateIntCast(Sizes[i], CGM.SizeTy, /*isSigned=*/true),
5850             SAddr);
5851       }
5852     }
5853   }
5854 }
5855 /// \brief Emit the arguments to be passed to the runtime library based on the
5856 /// arrays of pointers, sizes and map types.
5857 static void emitOffloadingArraysArgument(
5858     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
5859     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
5860     llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
5861   auto &CGM = CGF.CGM;
5862   if (Info.NumberOfPtrs) {
5863     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
5864         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
5865         Info.BasePointersArray,
5866         /*Idx0=*/0, /*Idx1=*/0);
5867     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
5868         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
5869         Info.PointersArray,
5870         /*Idx0=*/0,
5871         /*Idx1=*/0);
5872     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
5873         llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray,
5874         /*Idx0=*/0, /*Idx1=*/0);
5875     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
5876         llvm::ArrayType::get(CGM.Int32Ty, Info.NumberOfPtrs),
5877         Info.MapTypesArray,
5878         /*Idx0=*/0,
5879         /*Idx1=*/0);
5880   } else {
5881     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
5882     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
5883     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo());
5884     MapTypesArrayArg =
5885         llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo());
5886   }
5887 }
5888 
5889 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
5890                                      const OMPExecutableDirective &D,
5891                                      llvm::Value *OutlinedFn,
5892                                      llvm::Value *OutlinedFnID,
5893                                      const Expr *IfCond, const Expr *Device,
5894                                      ArrayRef<llvm::Value *> CapturedVars) {
5895   if (!CGF.HaveInsertPoint())
5896     return;
5897 
5898   assert(OutlinedFn && "Invalid outlined function!");
5899 
5900   auto &Ctx = CGF.getContext();
5901 
5902   // Fill up the arrays with all the captured variables.
5903   MappableExprsHandler::MapValuesArrayTy KernelArgs;
5904   MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
5905   MappableExprsHandler::MapValuesArrayTy Pointers;
5906   MappableExprsHandler::MapValuesArrayTy Sizes;
5907   MappableExprsHandler::MapFlagsArrayTy MapTypes;
5908 
5909   MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
5910   MappableExprsHandler::MapValuesArrayTy CurPointers;
5911   MappableExprsHandler::MapValuesArrayTy CurSizes;
5912   MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
5913 
5914   // Get mappable expression information.
5915   MappableExprsHandler MEHandler(D, CGF);
5916 
5917   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
5918   auto RI = CS.getCapturedRecordDecl()->field_begin();
5919   auto CV = CapturedVars.begin();
5920   for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
5921                                             CE = CS.capture_end();
5922        CI != CE; ++CI, ++RI, ++CV) {
5923     StringRef Name;
5924     QualType Ty;
5925 
5926     CurBasePointers.clear();
5927     CurPointers.clear();
5928     CurSizes.clear();
5929     CurMapTypes.clear();
5930 
5931     // VLA sizes are passed to the outlined region by copy and do not have map
5932     // information associated.
5933     if (CI->capturesVariableArrayType()) {
5934       CurBasePointers.push_back(*CV);
5935       CurPointers.push_back(*CV);
5936       CurSizes.push_back(CGF.getTypeSize(RI->getType()));
5937       // Copy to the device as an argument. No need to retrieve it.
5938       CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_PRIVATE_VAL |
5939                             MappableExprsHandler::OMP_MAP_FIRST_REF);
5940     } else {
5941       // If we have any information in the map clause, we use it, otherwise we
5942       // just do a default mapping.
5943       MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
5944                                        CurSizes, CurMapTypes);
5945       if (CurBasePointers.empty())
5946         MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
5947                                          CurPointers, CurSizes, CurMapTypes);
5948     }
5949     // We expect to have at least an element of information for this capture.
5950     assert(!CurBasePointers.empty() && "Non-existing map pointer for capture!");
5951     assert(CurBasePointers.size() == CurPointers.size() &&
5952            CurBasePointers.size() == CurSizes.size() &&
5953            CurBasePointers.size() == CurMapTypes.size() &&
5954            "Inconsistent map information sizes!");
5955 
5956     // The kernel args are always the first elements of the base pointers
5957     // associated with a capture.
5958     KernelArgs.push_back(*CurBasePointers.front());
5959     // We need to append the results of this capture to what we already have.
5960     BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
5961     Pointers.append(CurPointers.begin(), CurPointers.end());
5962     Sizes.append(CurSizes.begin(), CurSizes.end());
5963     MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
5964   }
5965 
5966   // Keep track on whether the host function has to be executed.
5967   auto OffloadErrorQType =
5968       Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true);
5969   auto OffloadError = CGF.MakeAddrLValue(
5970       CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"),
5971       OffloadErrorQType);
5972   CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty),
5973                         OffloadError);
5974 
5975   // Fill up the pointer arrays and transfer execution to the device.
5976   auto &&ThenGen = [&Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes, Device,
5977                     OutlinedFnID, OffloadError, OffloadErrorQType,
5978                     &D](CodeGenFunction &CGF, PrePostActionTy &) {
5979     auto &RT = CGF.CGM.getOpenMPRuntime();
5980     // Emit the offloading arrays.
5981     TargetDataInfo Info;
5982     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
5983     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
5984                                  Info.PointersArray, Info.SizesArray,
5985                                  Info.MapTypesArray, Info);
5986 
5987     // On top of the arrays that were filled up, the target offloading call
5988     // takes as arguments the device id as well as the host pointer. The host
5989     // pointer is used by the runtime library to identify the current target
5990     // region, so it only has to be unique and not necessarily point to
5991     // anything. It could be the pointer to the outlined function that
5992     // implements the target region, but we aren't using that so that the
5993     // compiler doesn't need to keep that, and could therefore inline the host
5994     // function if proven worthwhile during optimization.
5995 
5996     // From this point on, we need to have an ID of the target region defined.
5997     assert(OutlinedFnID && "Invalid outlined function ID!");
5998 
5999     // Emit device ID if any.
6000     llvm::Value *DeviceID;
6001     if (Device)
6002       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
6003                                            CGF.Int32Ty, /*isSigned=*/true);
6004     else
6005       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
6006 
6007     // Emit the number of elements in the offloading arrays.
6008     llvm::Value *PointerNum = CGF.Builder.getInt32(BasePointers.size());
6009 
6010     // Return value of the runtime offloading call.
6011     llvm::Value *Return;
6012 
6013     auto *NumTeams = emitNumTeamsClauseForTargetDirective(RT, CGF, D);
6014     auto *ThreadLimit = emitThreadLimitClauseForTargetDirective(RT, CGF, D);
6015 
6016     // If we have NumTeams defined this means that we have an enclosed teams
6017     // region. Therefore we also expect to have ThreadLimit defined. These two
6018     // values should be defined in the presence of a teams directive, regardless
6019     // of having any clauses associated. If the user is using teams but no
6020     // clauses, these two values will be the default that should be passed to
6021     // the runtime library - a 32-bit integer with the value zero.
6022     if (NumTeams) {
6023       assert(ThreadLimit && "Thread limit expression should be available along "
6024                             "with number of teams.");
6025       llvm::Value *OffloadingArgs[] = {
6026           DeviceID,           OutlinedFnID,
6027           PointerNum,         Info.BasePointersArray,
6028           Info.PointersArray, Info.SizesArray,
6029           Info.MapTypesArray, NumTeams,
6030           ThreadLimit};
6031       Return = CGF.EmitRuntimeCall(
6032           RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs);
6033     } else {
6034       llvm::Value *OffloadingArgs[] = {
6035           DeviceID,           OutlinedFnID,
6036           PointerNum,         Info.BasePointersArray,
6037           Info.PointersArray, Info.SizesArray,
6038           Info.MapTypesArray};
6039       Return = CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target),
6040                                    OffloadingArgs);
6041     }
6042 
6043     CGF.EmitStoreOfScalar(Return, OffloadError);
6044   };
6045 
6046   // Notify that the host version must be executed.
6047   auto &&ElseGen = [OffloadError](CodeGenFunction &CGF, PrePostActionTy &) {
6048     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/-1u),
6049                           OffloadError);
6050   };
6051 
6052   // If we have a target function ID it means that we need to support
6053   // offloading, otherwise, just execute on the host. We need to execute on host
6054   // regardless of the conditional in the if clause if, e.g., the user do not
6055   // specify target triples.
6056   if (OutlinedFnID) {
6057     if (IfCond)
6058       emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
6059     else {
6060       RegionCodeGenTy ThenRCG(ThenGen);
6061       ThenRCG(CGF);
6062     }
6063   } else {
6064     RegionCodeGenTy ElseRCG(ElseGen);
6065     ElseRCG(CGF);
6066   }
6067 
6068   // Check the error code and execute the host version if required.
6069   auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed");
6070   auto OffloadContBlock = CGF.createBasicBlock("omp_offload.cont");
6071   auto OffloadErrorVal = CGF.EmitLoadOfScalar(OffloadError, SourceLocation());
6072   auto Failed = CGF.Builder.CreateIsNotNull(OffloadErrorVal);
6073   CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
6074 
6075   CGF.EmitBlock(OffloadFailedBlock);
6076   CGF.Builder.CreateCall(OutlinedFn, KernelArgs);
6077   CGF.EmitBranch(OffloadContBlock);
6078 
6079   CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
6080 }
6081 
6082 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
6083                                                     StringRef ParentName) {
6084   if (!S)
6085     return;
6086 
6087   // If we find a OMP target directive, codegen the outline function and
6088   // register the result.
6089   // FIXME: Add other directives with target when they become supported.
6090   bool isTargetDirective = isa<OMPTargetDirective>(S);
6091 
6092   if (isTargetDirective) {
6093     auto *E = cast<OMPExecutableDirective>(S);
6094     unsigned DeviceID;
6095     unsigned FileID;
6096     unsigned Line;
6097     getTargetEntryUniqueInfo(CGM.getContext(), E->getLocStart(), DeviceID,
6098                              FileID, Line);
6099 
6100     // Is this a target region that should not be emitted as an entry point? If
6101     // so just signal we are done with this target region.
6102     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
6103                                                             ParentName, Line))
6104       return;
6105 
6106     llvm::Function *Fn;
6107     llvm::Constant *Addr;
6108     std::tie(Fn, Addr) =
6109         CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction(
6110             CGM, cast<OMPTargetDirective>(*E), ParentName,
6111             /*isOffloadEntry=*/true);
6112     assert(Fn && Addr && "Target region emission failed.");
6113     return;
6114   }
6115 
6116   if (const OMPExecutableDirective *E = dyn_cast<OMPExecutableDirective>(S)) {
6117     if (!E->hasAssociatedStmt())
6118       return;
6119 
6120     scanForTargetRegionsFunctions(
6121         cast<CapturedStmt>(E->getAssociatedStmt())->getCapturedStmt(),
6122         ParentName);
6123     return;
6124   }
6125 
6126   // If this is a lambda function, look into its body.
6127   if (auto *L = dyn_cast<LambdaExpr>(S))
6128     S = L->getBody();
6129 
6130   // Keep looking for target regions recursively.
6131   for (auto *II : S->children())
6132     scanForTargetRegionsFunctions(II, ParentName);
6133 }
6134 
6135 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
6136   auto &FD = *cast<FunctionDecl>(GD.getDecl());
6137 
6138   // If emitting code for the host, we do not process FD here. Instead we do
6139   // the normal code generation.
6140   if (!CGM.getLangOpts().OpenMPIsDevice)
6141     return false;
6142 
6143   // Try to detect target regions in the function.
6144   scanForTargetRegionsFunctions(FD.getBody(), CGM.getMangledName(GD));
6145 
6146   // We should not emit any function othen that the ones created during the
6147   // scanning. Therefore, we signal that this function is completely dealt
6148   // with.
6149   return true;
6150 }
6151 
6152 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
6153   if (!CGM.getLangOpts().OpenMPIsDevice)
6154     return false;
6155 
6156   // Check if there are Ctors/Dtors in this declaration and look for target
6157   // regions in it. We use the complete variant to produce the kernel name
6158   // mangling.
6159   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
6160   if (auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
6161     for (auto *Ctor : RD->ctors()) {
6162       StringRef ParentName =
6163           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
6164       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
6165     }
6166     auto *Dtor = RD->getDestructor();
6167     if (Dtor) {
6168       StringRef ParentName =
6169           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
6170       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
6171     }
6172   }
6173 
6174   // If we are in target mode we do not emit any global (declare target is not
6175   // implemented yet). Therefore we signal that GD was processed in this case.
6176   return true;
6177 }
6178 
6179 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
6180   auto *VD = GD.getDecl();
6181   if (isa<FunctionDecl>(VD))
6182     return emitTargetFunctions(GD);
6183 
6184   return emitTargetGlobalVariable(GD);
6185 }
6186 
6187 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
6188   // If we have offloading in the current module, we need to emit the entries
6189   // now and register the offloading descriptor.
6190   createOffloadEntriesAndInfoMetadata();
6191 
6192   // Create and register the offloading binary descriptors. This is the main
6193   // entity that captures all the information about offloading in the current
6194   // compilation unit.
6195   return createOffloadingBinaryDescriptorRegistration();
6196 }
6197 
6198 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
6199                                     const OMPExecutableDirective &D,
6200                                     SourceLocation Loc,
6201                                     llvm::Value *OutlinedFn,
6202                                     ArrayRef<llvm::Value *> CapturedVars) {
6203   if (!CGF.HaveInsertPoint())
6204     return;
6205 
6206   auto *RTLoc = emitUpdateLocation(CGF, Loc);
6207   CodeGenFunction::RunCleanupsScope Scope(CGF);
6208 
6209   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
6210   llvm::Value *Args[] = {
6211       RTLoc,
6212       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
6213       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
6214   llvm::SmallVector<llvm::Value *, 16> RealArgs;
6215   RealArgs.append(std::begin(Args), std::end(Args));
6216   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
6217 
6218   auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
6219   CGF.EmitRuntimeCall(RTLFn, RealArgs);
6220 }
6221 
6222 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
6223                                          const Expr *NumTeams,
6224                                          const Expr *ThreadLimit,
6225                                          SourceLocation Loc) {
6226   if (!CGF.HaveInsertPoint())
6227     return;
6228 
6229   auto *RTLoc = emitUpdateLocation(CGF, Loc);
6230 
6231   llvm::Value *NumTeamsVal =
6232       (NumTeams)
6233           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
6234                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
6235           : CGF.Builder.getInt32(0);
6236 
6237   llvm::Value *ThreadLimitVal =
6238       (ThreadLimit)
6239           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
6240                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
6241           : CGF.Builder.getInt32(0);
6242 
6243   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
6244   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
6245                                      ThreadLimitVal};
6246   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
6247                       PushNumTeamsArgs);
6248 }
6249 
6250 void CGOpenMPRuntime::emitTargetDataCalls(
6251     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
6252     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
6253   if (!CGF.HaveInsertPoint())
6254     return;
6255 
6256   // Action used to replace the default codegen action and turn privatization
6257   // off.
6258   PrePostActionTy NoPrivAction;
6259 
6260   // Generate the code for the opening of the data environment. Capture all the
6261   // arguments of the runtime call by reference because they are used in the
6262   // closing of the region.
6263   auto &&BeginThenGen = [&D, &CGF, Device, &Info, &CodeGen, &NoPrivAction](
6264       CodeGenFunction &CGF, PrePostActionTy &) {
6265     // Fill up the arrays with all the mapped variables.
6266     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
6267     MappableExprsHandler::MapValuesArrayTy Pointers;
6268     MappableExprsHandler::MapValuesArrayTy Sizes;
6269     MappableExprsHandler::MapFlagsArrayTy MapTypes;
6270 
6271     // Get map clause information.
6272     MappableExprsHandler MCHandler(D, CGF);
6273     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
6274 
6275     // Fill up the arrays and create the arguments.
6276     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
6277 
6278     llvm::Value *BasePointersArrayArg = nullptr;
6279     llvm::Value *PointersArrayArg = nullptr;
6280     llvm::Value *SizesArrayArg = nullptr;
6281     llvm::Value *MapTypesArrayArg = nullptr;
6282     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
6283                                  SizesArrayArg, MapTypesArrayArg, Info);
6284 
6285     // Emit device ID if any.
6286     llvm::Value *DeviceID = nullptr;
6287     if (Device)
6288       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
6289                                            CGF.Int32Ty, /*isSigned=*/true);
6290     else
6291       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
6292 
6293     // Emit the number of elements in the offloading arrays.
6294     auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
6295 
6296     llvm::Value *OffloadingArgs[] = {
6297         DeviceID,         PointerNum,    BasePointersArrayArg,
6298         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
6299     auto &RT = CGF.CGM.getOpenMPRuntime();
6300     CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_begin),
6301                         OffloadingArgs);
6302 
6303     // If device pointer privatization is required, emit the body of the region
6304     // here. It will have to be duplicated: with and without privatization.
6305     if (!Info.CaptureDeviceAddrMap.empty())
6306       CodeGen(CGF);
6307   };
6308 
6309   // Generate code for the closing of the data region.
6310   auto &&EndThenGen = [&CGF, Device, &Info](CodeGenFunction &CGF,
6311                                             PrePostActionTy &) {
6312     assert(Info.isValid() && "Invalid data environment closing arguments.");
6313 
6314     llvm::Value *BasePointersArrayArg = nullptr;
6315     llvm::Value *PointersArrayArg = nullptr;
6316     llvm::Value *SizesArrayArg = nullptr;
6317     llvm::Value *MapTypesArrayArg = nullptr;
6318     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
6319                                  SizesArrayArg, MapTypesArrayArg, Info);
6320 
6321     // Emit device ID if any.
6322     llvm::Value *DeviceID = nullptr;
6323     if (Device)
6324       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
6325                                            CGF.Int32Ty, /*isSigned=*/true);
6326     else
6327       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
6328 
6329     // Emit the number of elements in the offloading arrays.
6330     auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
6331 
6332     llvm::Value *OffloadingArgs[] = {
6333         DeviceID,         PointerNum,    BasePointersArrayArg,
6334         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
6335     auto &RT = CGF.CGM.getOpenMPRuntime();
6336     CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_end),
6337                         OffloadingArgs);
6338   };
6339 
6340   // If we need device pointer privatization, we need to emit the body of the
6341   // region with no privatization in the 'else' branch of the conditional.
6342   // Otherwise, we don't have to do anything.
6343   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
6344                                                          PrePostActionTy &) {
6345     if (!Info.CaptureDeviceAddrMap.empty()) {
6346       CodeGen.setAction(NoPrivAction);
6347       CodeGen(CGF);
6348     }
6349   };
6350 
6351   // We don't have to do anything to close the region if the if clause evaluates
6352   // to false.
6353   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
6354 
6355   if (IfCond) {
6356     emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
6357   } else {
6358     RegionCodeGenTy RCG(BeginThenGen);
6359     RCG(CGF);
6360   }
6361 
6362   // If we don't require privatization of device pointers, we emit the body in
6363   // between the runtime calls. This avoids duplicating the body code.
6364   if (Info.CaptureDeviceAddrMap.empty()) {
6365     CodeGen.setAction(NoPrivAction);
6366     CodeGen(CGF);
6367   }
6368 
6369   if (IfCond) {
6370     emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen);
6371   } else {
6372     RegionCodeGenTy RCG(EndThenGen);
6373     RCG(CGF);
6374   }
6375 }
6376 
6377 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
6378     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
6379     const Expr *Device) {
6380   if (!CGF.HaveInsertPoint())
6381     return;
6382 
6383   assert((isa<OMPTargetEnterDataDirective>(D) ||
6384           isa<OMPTargetExitDataDirective>(D) ||
6385           isa<OMPTargetUpdateDirective>(D)) &&
6386          "Expecting either target enter, exit data, or update directives.");
6387 
6388   // Generate the code for the opening of the data environment.
6389   auto &&ThenGen = [&D, &CGF, Device](CodeGenFunction &CGF, PrePostActionTy &) {
6390     // Fill up the arrays with all the mapped variables.
6391     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
6392     MappableExprsHandler::MapValuesArrayTy Pointers;
6393     MappableExprsHandler::MapValuesArrayTy Sizes;
6394     MappableExprsHandler::MapFlagsArrayTy MapTypes;
6395 
6396     // Get map clause information.
6397     MappableExprsHandler MEHandler(D, CGF);
6398     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
6399 
6400     // Fill up the arrays and create the arguments.
6401     TargetDataInfo Info;
6402     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
6403     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
6404                                  Info.PointersArray, Info.SizesArray,
6405                                  Info.MapTypesArray, Info);
6406 
6407     // Emit device ID if any.
6408     llvm::Value *DeviceID = nullptr;
6409     if (Device)
6410       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
6411                                            CGF.Int32Ty, /*isSigned=*/true);
6412     else
6413       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
6414 
6415     // Emit the number of elements in the offloading arrays.
6416     auto *PointerNum = CGF.Builder.getInt32(BasePointers.size());
6417 
6418     llvm::Value *OffloadingArgs[] = {
6419         DeviceID,           PointerNum,      Info.BasePointersArray,
6420         Info.PointersArray, Info.SizesArray, Info.MapTypesArray};
6421 
6422     auto &RT = CGF.CGM.getOpenMPRuntime();
6423     // Select the right runtime function call for each expected standalone
6424     // directive.
6425     OpenMPRTLFunction RTLFn;
6426     switch (D.getDirectiveKind()) {
6427     default:
6428       llvm_unreachable("Unexpected standalone target data directive.");
6429       break;
6430     case OMPD_target_enter_data:
6431       RTLFn = OMPRTL__tgt_target_data_begin;
6432       break;
6433     case OMPD_target_exit_data:
6434       RTLFn = OMPRTL__tgt_target_data_end;
6435       break;
6436     case OMPD_target_update:
6437       RTLFn = OMPRTL__tgt_target_data_update;
6438       break;
6439     }
6440     CGF.EmitRuntimeCall(RT.createRuntimeFunction(RTLFn), OffloadingArgs);
6441   };
6442 
6443   // In the event we get an if clause, we don't have to take any action on the
6444   // else side.
6445   auto &&ElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
6446 
6447   if (IfCond) {
6448     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
6449   } else {
6450     RegionCodeGenTy ThenGenRCG(ThenGen);
6451     ThenGenRCG(CGF);
6452   }
6453 }
6454 
6455 namespace {
6456   /// Kind of parameter in a function with 'declare simd' directive.
6457   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
6458   /// Attribute set of the parameter.
6459   struct ParamAttrTy {
6460     ParamKindTy Kind = Vector;
6461     llvm::APSInt StrideOrArg;
6462     llvm::APSInt Alignment;
6463   };
6464 } // namespace
6465 
6466 static unsigned evaluateCDTSize(const FunctionDecl *FD,
6467                                 ArrayRef<ParamAttrTy> ParamAttrs) {
6468   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
6469   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
6470   // of that clause. The VLEN value must be power of 2.
6471   // In other case the notion of the function`s "characteristic data type" (CDT)
6472   // is used to compute the vector length.
6473   // CDT is defined in the following order:
6474   //   a) For non-void function, the CDT is the return type.
6475   //   b) If the function has any non-uniform, non-linear parameters, then the
6476   //   CDT is the type of the first such parameter.
6477   //   c) If the CDT determined by a) or b) above is struct, union, or class
6478   //   type which is pass-by-value (except for the type that maps to the
6479   //   built-in complex data type), the characteristic data type is int.
6480   //   d) If none of the above three cases is applicable, the CDT is int.
6481   // The VLEN is then determined based on the CDT and the size of vector
6482   // register of that ISA for which current vector version is generated. The
6483   // VLEN is computed using the formula below:
6484   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
6485   // where vector register size specified in section 3.2.1 Registers and the
6486   // Stack Frame of original AMD64 ABI document.
6487   QualType RetType = FD->getReturnType();
6488   if (RetType.isNull())
6489     return 0;
6490   ASTContext &C = FD->getASTContext();
6491   QualType CDT;
6492   if (!RetType.isNull() && !RetType->isVoidType())
6493     CDT = RetType;
6494   else {
6495     unsigned Offset = 0;
6496     if (auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
6497       if (ParamAttrs[Offset].Kind == Vector)
6498         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
6499       ++Offset;
6500     }
6501     if (CDT.isNull()) {
6502       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
6503         if (ParamAttrs[I + Offset].Kind == Vector) {
6504           CDT = FD->getParamDecl(I)->getType();
6505           break;
6506         }
6507       }
6508     }
6509   }
6510   if (CDT.isNull())
6511     CDT = C.IntTy;
6512   CDT = CDT->getCanonicalTypeUnqualified();
6513   if (CDT->isRecordType() || CDT->isUnionType())
6514     CDT = C.IntTy;
6515   return C.getTypeSize(CDT);
6516 }
6517 
6518 static void
6519 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
6520                            const llvm::APSInt &VLENVal,
6521                            ArrayRef<ParamAttrTy> ParamAttrs,
6522                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
6523   struct ISADataTy {
6524     char ISA;
6525     unsigned VecRegSize;
6526   };
6527   ISADataTy ISAData[] = {
6528       {
6529           'b', 128
6530       }, // SSE
6531       {
6532           'c', 256
6533       }, // AVX
6534       {
6535           'd', 256
6536       }, // AVX2
6537       {
6538           'e', 512
6539       }, // AVX512
6540   };
6541   llvm::SmallVector<char, 2> Masked;
6542   switch (State) {
6543   case OMPDeclareSimdDeclAttr::BS_Undefined:
6544     Masked.push_back('N');
6545     Masked.push_back('M');
6546     break;
6547   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
6548     Masked.push_back('N');
6549     break;
6550   case OMPDeclareSimdDeclAttr::BS_Inbranch:
6551     Masked.push_back('M');
6552     break;
6553   }
6554   for (auto Mask : Masked) {
6555     for (auto &Data : ISAData) {
6556       SmallString<256> Buffer;
6557       llvm::raw_svector_ostream Out(Buffer);
6558       Out << "_ZGV" << Data.ISA << Mask;
6559       if (!VLENVal) {
6560         Out << llvm::APSInt::getUnsigned(Data.VecRegSize /
6561                                          evaluateCDTSize(FD, ParamAttrs));
6562       } else
6563         Out << VLENVal;
6564       for (auto &ParamAttr : ParamAttrs) {
6565         switch (ParamAttr.Kind){
6566         case LinearWithVarStride:
6567           Out << 's' << ParamAttr.StrideOrArg;
6568           break;
6569         case Linear:
6570           Out << 'l';
6571           if (!!ParamAttr.StrideOrArg)
6572             Out << ParamAttr.StrideOrArg;
6573           break;
6574         case Uniform:
6575           Out << 'u';
6576           break;
6577         case Vector:
6578           Out << 'v';
6579           break;
6580         }
6581         if (!!ParamAttr.Alignment)
6582           Out << 'a' << ParamAttr.Alignment;
6583       }
6584       Out << '_' << Fn->getName();
6585       Fn->addFnAttr(Out.str());
6586     }
6587   }
6588 }
6589 
6590 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
6591                                               llvm::Function *Fn) {
6592   ASTContext &C = CGM.getContext();
6593   FD = FD->getCanonicalDecl();
6594   // Map params to their positions in function decl.
6595   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
6596   if (isa<CXXMethodDecl>(FD))
6597     ParamPositions.insert({FD, 0});
6598   unsigned ParamPos = ParamPositions.size();
6599   for (auto *P : FD->parameters()) {
6600     ParamPositions.insert({P->getCanonicalDecl(), ParamPos});
6601     ++ParamPos;
6602   }
6603   for (auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
6604     llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
6605     // Mark uniform parameters.
6606     for (auto *E : Attr->uniforms()) {
6607       E = E->IgnoreParenImpCasts();
6608       unsigned Pos;
6609       if (isa<CXXThisExpr>(E))
6610         Pos = ParamPositions[FD];
6611       else {
6612         auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
6613                         ->getCanonicalDecl();
6614         Pos = ParamPositions[PVD];
6615       }
6616       ParamAttrs[Pos].Kind = Uniform;
6617     }
6618     // Get alignment info.
6619     auto NI = Attr->alignments_begin();
6620     for (auto *E : Attr->aligneds()) {
6621       E = E->IgnoreParenImpCasts();
6622       unsigned Pos;
6623       QualType ParmTy;
6624       if (isa<CXXThisExpr>(E)) {
6625         Pos = ParamPositions[FD];
6626         ParmTy = E->getType();
6627       } else {
6628         auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
6629                         ->getCanonicalDecl();
6630         Pos = ParamPositions[PVD];
6631         ParmTy = PVD->getType();
6632       }
6633       ParamAttrs[Pos].Alignment =
6634           (*NI) ? (*NI)->EvaluateKnownConstInt(C)
6635                 : llvm::APSInt::getUnsigned(
6636                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
6637                           .getQuantity());
6638       ++NI;
6639     }
6640     // Mark linear parameters.
6641     auto SI = Attr->steps_begin();
6642     auto MI = Attr->modifiers_begin();
6643     for (auto *E : Attr->linears()) {
6644       E = E->IgnoreParenImpCasts();
6645       unsigned Pos;
6646       if (isa<CXXThisExpr>(E))
6647         Pos = ParamPositions[FD];
6648       else {
6649         auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
6650                         ->getCanonicalDecl();
6651         Pos = ParamPositions[PVD];
6652       }
6653       auto &ParamAttr = ParamAttrs[Pos];
6654       ParamAttr.Kind = Linear;
6655       if (*SI) {
6656         if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C,
6657                                   Expr::SE_AllowSideEffects)) {
6658           if (auto *DRE = cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
6659             if (auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
6660               ParamAttr.Kind = LinearWithVarStride;
6661               ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
6662                   ParamPositions[StridePVD->getCanonicalDecl()]);
6663             }
6664           }
6665         }
6666       }
6667       ++SI;
6668       ++MI;
6669     }
6670     llvm::APSInt VLENVal;
6671     if (const Expr *VLEN = Attr->getSimdlen())
6672       VLENVal = VLEN->EvaluateKnownConstInt(C);
6673     OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
6674     if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
6675         CGM.getTriple().getArch() == llvm::Triple::x86_64)
6676       emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
6677   }
6678 }
6679 
6680 namespace {
6681 /// Cleanup action for doacross support.
6682 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
6683 public:
6684   static const int DoacrossFinArgs = 2;
6685 
6686 private:
6687   llvm::Value *RTLFn;
6688   llvm::Value *Args[DoacrossFinArgs];
6689 
6690 public:
6691   DoacrossCleanupTy(llvm::Value *RTLFn, ArrayRef<llvm::Value *> CallArgs)
6692       : RTLFn(RTLFn) {
6693     assert(CallArgs.size() == DoacrossFinArgs);
6694     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
6695   }
6696   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
6697     if (!CGF.HaveInsertPoint())
6698       return;
6699     CGF.EmitRuntimeCall(RTLFn, Args);
6700   }
6701 };
6702 } // namespace
6703 
6704 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
6705                                        const OMPLoopDirective &D) {
6706   if (!CGF.HaveInsertPoint())
6707     return;
6708 
6709   ASTContext &C = CGM.getContext();
6710   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
6711   RecordDecl *RD;
6712   if (KmpDimTy.isNull()) {
6713     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
6714     //  kmp_int64 lo; // lower
6715     //  kmp_int64 up; // upper
6716     //  kmp_int64 st; // stride
6717     // };
6718     RD = C.buildImplicitRecord("kmp_dim");
6719     RD->startDefinition();
6720     addFieldToRecordDecl(C, RD, Int64Ty);
6721     addFieldToRecordDecl(C, RD, Int64Ty);
6722     addFieldToRecordDecl(C, RD, Int64Ty);
6723     RD->completeDefinition();
6724     KmpDimTy = C.getRecordType(RD);
6725   } else
6726     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
6727 
6728   Address DimsAddr = CGF.CreateMemTemp(KmpDimTy, "dims");
6729   CGF.EmitNullInitialization(DimsAddr, KmpDimTy);
6730   enum { LowerFD = 0, UpperFD, StrideFD };
6731   // Fill dims with data.
6732   LValue DimsLVal = CGF.MakeAddrLValue(DimsAddr, KmpDimTy);
6733   // dims.upper = num_iterations;
6734   LValue UpperLVal =
6735       CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), UpperFD));
6736   llvm::Value *NumIterVal = CGF.EmitScalarConversion(
6737       CGF.EmitScalarExpr(D.getNumIterations()), D.getNumIterations()->getType(),
6738       Int64Ty, D.getNumIterations()->getExprLoc());
6739   CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
6740   // dims.stride = 1;
6741   LValue StrideLVal =
6742       CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), StrideFD));
6743   CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
6744                         StrideLVal);
6745 
6746   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
6747   // kmp_int32 num_dims, struct kmp_dim * dims);
6748   llvm::Value *Args[] = {emitUpdateLocation(CGF, D.getLocStart()),
6749                          getThreadID(CGF, D.getLocStart()),
6750                          llvm::ConstantInt::getSigned(CGM.Int32Ty, 1),
6751                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6752                              DimsAddr.getPointer(), CGM.VoidPtrTy)};
6753 
6754   llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_init);
6755   CGF.EmitRuntimeCall(RTLFn, Args);
6756   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
6757       emitUpdateLocation(CGF, D.getLocEnd()), getThreadID(CGF, D.getLocEnd())};
6758   llvm::Value *FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
6759   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
6760                                              llvm::makeArrayRef(FiniArgs));
6761 }
6762 
6763 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
6764                                           const OMPDependClause *C) {
6765   QualType Int64Ty =
6766       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
6767   const Expr *CounterVal = C->getCounterValue();
6768   assert(CounterVal);
6769   llvm::Value *CntVal = CGF.EmitScalarConversion(CGF.EmitScalarExpr(CounterVal),
6770                                                  CounterVal->getType(), Int64Ty,
6771                                                  CounterVal->getExprLoc());
6772   Address CntAddr = CGF.CreateMemTemp(Int64Ty, ".cnt.addr");
6773   CGF.EmitStoreOfScalar(CntVal, CntAddr, /*Volatile=*/false, Int64Ty);
6774   llvm::Value *Args[] = {emitUpdateLocation(CGF, C->getLocStart()),
6775                          getThreadID(CGF, C->getLocStart()),
6776                          CntAddr.getPointer()};
6777   llvm::Value *RTLFn;
6778   if (C->getDependencyKind() == OMPC_DEPEND_source)
6779     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
6780   else {
6781     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
6782     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
6783   }
6784   CGF.EmitRuntimeCall(RTLFn, Args);
6785 }
6786 
6787