1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for OpenMP runtime code generation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGOpenMPRuntime.h"
17 #include "CodeGenFunction.h"
18 #include "ConstantBuilder.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/Bitcode/BitcodeReader.h"
23 #include "llvm/IR/CallSite.h"
24 #include "llvm/IR/DerivedTypes.h"
25 #include "llvm/IR/GlobalValue.h"
26 #include "llvm/IR/Value.h"
27 #include "llvm/Support/Format.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <cassert>
30 
31 using namespace clang;
32 using namespace CodeGen;
33 
34 namespace {
35 /// \brief Base class for handling code generation inside OpenMP regions.
36 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
37 public:
38   /// \brief Kinds of OpenMP regions used in codegen.
39   enum CGOpenMPRegionKind {
40     /// \brief Region with outlined function for standalone 'parallel'
41     /// directive.
42     ParallelOutlinedRegion,
43     /// \brief Region with outlined function for standalone 'task' directive.
44     TaskOutlinedRegion,
45     /// \brief Region for constructs that do not require function outlining,
46     /// like 'for', 'sections', 'atomic' etc. directives.
47     InlinedRegion,
48     /// \brief Region with outlined function for standalone 'target' directive.
49     TargetRegion,
50   };
51 
52   CGOpenMPRegionInfo(const CapturedStmt &CS,
53                      const CGOpenMPRegionKind RegionKind,
54                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
55                      bool HasCancel)
56       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
57         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
58 
59   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
60                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
61                      bool HasCancel)
62       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
63         Kind(Kind), HasCancel(HasCancel) {}
64 
65   /// \brief Get a variable or parameter for storing global thread id
66   /// inside OpenMP construct.
67   virtual const VarDecl *getThreadIDVariable() const = 0;
68 
69   /// \brief Emit the captured statement body.
70   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
71 
72   /// \brief Get an LValue for the current ThreadID variable.
73   /// \return LValue for thread id variable. This LValue always has type int32*.
74   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
75 
76   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
77 
78   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
79 
80   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
81 
82   bool hasCancel() const { return HasCancel; }
83 
84   static bool classof(const CGCapturedStmtInfo *Info) {
85     return Info->getKind() == CR_OpenMP;
86   }
87 
88   ~CGOpenMPRegionInfo() override = default;
89 
90 protected:
91   CGOpenMPRegionKind RegionKind;
92   RegionCodeGenTy CodeGen;
93   OpenMPDirectiveKind Kind;
94   bool HasCancel;
95 };
96 
97 /// \brief API for captured statement code generation in OpenMP constructs.
98 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
99 public:
100   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
101                              const RegionCodeGenTy &CodeGen,
102                              OpenMPDirectiveKind Kind, bool HasCancel)
103       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
104                            HasCancel),
105         ThreadIDVar(ThreadIDVar) {
106     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
107   }
108 
109   /// \brief Get a variable or parameter for storing global thread id
110   /// inside OpenMP construct.
111   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
112 
113   /// \brief Get the name of the capture helper.
114   StringRef getHelperName() const override { return ".omp_outlined."; }
115 
116   static bool classof(const CGCapturedStmtInfo *Info) {
117     return CGOpenMPRegionInfo::classof(Info) &&
118            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
119                ParallelOutlinedRegion;
120   }
121 
122 private:
123   /// \brief A variable or parameter storing global thread id for OpenMP
124   /// constructs.
125   const VarDecl *ThreadIDVar;
126 };
127 
128 /// \brief API for captured statement code generation in OpenMP constructs.
129 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
130 public:
131   class UntiedTaskActionTy final : public PrePostActionTy {
132     bool Untied;
133     const VarDecl *PartIDVar;
134     const RegionCodeGenTy UntiedCodeGen;
135     llvm::SwitchInst *UntiedSwitch = nullptr;
136 
137   public:
138     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
139                        const RegionCodeGenTy &UntiedCodeGen)
140         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
141     void Enter(CodeGenFunction &CGF) override {
142       if (Untied) {
143         // Emit task switching point.
144         auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
145             CGF.GetAddrOfLocalVar(PartIDVar),
146             PartIDVar->getType()->castAs<PointerType>());
147         auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, SourceLocation());
148         auto *DoneBB = CGF.createBasicBlock(".untied.done.");
149         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
150         CGF.EmitBlock(DoneBB);
151         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
152         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
153         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
154                               CGF.Builder.GetInsertBlock());
155         emitUntiedSwitch(CGF);
156       }
157     }
158     void emitUntiedSwitch(CodeGenFunction &CGF) const {
159       if (Untied) {
160         auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
161             CGF.GetAddrOfLocalVar(PartIDVar),
162             PartIDVar->getType()->castAs<PointerType>());
163         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
164                               PartIdLVal);
165         UntiedCodeGen(CGF);
166         CodeGenFunction::JumpDest CurPoint =
167             CGF.getJumpDestInCurrentScope(".untied.next.");
168         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
169         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
170         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
171                               CGF.Builder.GetInsertBlock());
172         CGF.EmitBranchThroughCleanup(CurPoint);
173         CGF.EmitBlock(CurPoint.getBlock());
174       }
175     }
176     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
177   };
178   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
179                                  const VarDecl *ThreadIDVar,
180                                  const RegionCodeGenTy &CodeGen,
181                                  OpenMPDirectiveKind Kind, bool HasCancel,
182                                  const UntiedTaskActionTy &Action)
183       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
184         ThreadIDVar(ThreadIDVar), Action(Action) {
185     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
186   }
187 
188   /// \brief Get a variable or parameter for storing global thread id
189   /// inside OpenMP construct.
190   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
191 
192   /// \brief Get an LValue for the current ThreadID variable.
193   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
194 
195   /// \brief Get the name of the capture helper.
196   StringRef getHelperName() const override { return ".omp_outlined."; }
197 
198   void emitUntiedSwitch(CodeGenFunction &CGF) override {
199     Action.emitUntiedSwitch(CGF);
200   }
201 
202   static bool classof(const CGCapturedStmtInfo *Info) {
203     return CGOpenMPRegionInfo::classof(Info) &&
204            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
205                TaskOutlinedRegion;
206   }
207 
208 private:
209   /// \brief A variable or parameter storing global thread id for OpenMP
210   /// constructs.
211   const VarDecl *ThreadIDVar;
212   /// Action for emitting code for untied tasks.
213   const UntiedTaskActionTy &Action;
214 };
215 
216 /// \brief API for inlined captured statement code generation in OpenMP
217 /// constructs.
218 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
219 public:
220   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
221                             const RegionCodeGenTy &CodeGen,
222                             OpenMPDirectiveKind Kind, bool HasCancel)
223       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
224         OldCSI(OldCSI),
225         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
226 
227   // \brief Retrieve the value of the context parameter.
228   llvm::Value *getContextValue() const override {
229     if (OuterRegionInfo)
230       return OuterRegionInfo->getContextValue();
231     llvm_unreachable("No context value for inlined OpenMP region");
232   }
233 
234   void setContextValue(llvm::Value *V) override {
235     if (OuterRegionInfo) {
236       OuterRegionInfo->setContextValue(V);
237       return;
238     }
239     llvm_unreachable("No context value for inlined OpenMP region");
240   }
241 
242   /// \brief Lookup the captured field decl for a variable.
243   const FieldDecl *lookup(const VarDecl *VD) const override {
244     if (OuterRegionInfo)
245       return OuterRegionInfo->lookup(VD);
246     // If there is no outer outlined region,no need to lookup in a list of
247     // captured variables, we can use the original one.
248     return nullptr;
249   }
250 
251   FieldDecl *getThisFieldDecl() const override {
252     if (OuterRegionInfo)
253       return OuterRegionInfo->getThisFieldDecl();
254     return nullptr;
255   }
256 
257   /// \brief Get a variable or parameter for storing global thread id
258   /// inside OpenMP construct.
259   const VarDecl *getThreadIDVariable() const override {
260     if (OuterRegionInfo)
261       return OuterRegionInfo->getThreadIDVariable();
262     return nullptr;
263   }
264 
265   /// \brief Get the name of the capture helper.
266   StringRef getHelperName() const override {
267     if (auto *OuterRegionInfo = getOldCSI())
268       return OuterRegionInfo->getHelperName();
269     llvm_unreachable("No helper name for inlined OpenMP construct");
270   }
271 
272   void emitUntiedSwitch(CodeGenFunction &CGF) override {
273     if (OuterRegionInfo)
274       OuterRegionInfo->emitUntiedSwitch(CGF);
275   }
276 
277   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
278 
279   static bool classof(const CGCapturedStmtInfo *Info) {
280     return CGOpenMPRegionInfo::classof(Info) &&
281            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
282   }
283 
284   ~CGOpenMPInlinedRegionInfo() override = default;
285 
286 private:
287   /// \brief CodeGen info about outer OpenMP region.
288   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
289   CGOpenMPRegionInfo *OuterRegionInfo;
290 };
291 
292 /// \brief API for captured statement code generation in OpenMP target
293 /// constructs. For this captures, implicit parameters are used instead of the
294 /// captured fields. The name of the target region has to be unique in a given
295 /// application so it is provided by the client, because only the client has
296 /// the information to generate that.
297 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
298 public:
299   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
300                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
301       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
302                            /*HasCancel=*/false),
303         HelperName(HelperName) {}
304 
305   /// \brief This is unused for target regions because each starts executing
306   /// with a single thread.
307   const VarDecl *getThreadIDVariable() const override { return nullptr; }
308 
309   /// \brief Get the name of the capture helper.
310   StringRef getHelperName() const override { return HelperName; }
311 
312   static bool classof(const CGCapturedStmtInfo *Info) {
313     return CGOpenMPRegionInfo::classof(Info) &&
314            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
315   }
316 
317 private:
318   StringRef HelperName;
319 };
320 
321 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
322   llvm_unreachable("No codegen for expressions");
323 }
324 /// \brief API for generation of expressions captured in a innermost OpenMP
325 /// region.
326 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
327 public:
328   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
329       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
330                                   OMPD_unknown,
331                                   /*HasCancel=*/false),
332         PrivScope(CGF) {
333     // Make sure the globals captured in the provided statement are local by
334     // using the privatization logic. We assume the same variable is not
335     // captured more than once.
336     for (auto &C : CS.captures()) {
337       if (!C.capturesVariable() && !C.capturesVariableByCopy())
338         continue;
339 
340       const VarDecl *VD = C.getCapturedVar();
341       if (VD->isLocalVarDeclOrParm())
342         continue;
343 
344       DeclRefExpr DRE(const_cast<VarDecl *>(VD),
345                       /*RefersToEnclosingVariableOrCapture=*/false,
346                       VD->getType().getNonReferenceType(), VK_LValue,
347                       SourceLocation());
348       PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address {
349         return CGF.EmitLValue(&DRE).getAddress();
350       });
351     }
352     (void)PrivScope.Privatize();
353   }
354 
355   /// \brief Lookup the captured field decl for a variable.
356   const FieldDecl *lookup(const VarDecl *VD) const override {
357     if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
358       return FD;
359     return nullptr;
360   }
361 
362   /// \brief Emit the captured statement body.
363   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
364     llvm_unreachable("No body for expressions");
365   }
366 
367   /// \brief Get a variable or parameter for storing global thread id
368   /// inside OpenMP construct.
369   const VarDecl *getThreadIDVariable() const override {
370     llvm_unreachable("No thread id for expressions");
371   }
372 
373   /// \brief Get the name of the capture helper.
374   StringRef getHelperName() const override {
375     llvm_unreachable("No helper name for expressions");
376   }
377 
378   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
379 
380 private:
381   /// Private scope to capture global variables.
382   CodeGenFunction::OMPPrivateScope PrivScope;
383 };
384 
385 /// \brief RAII for emitting code of OpenMP constructs.
386 class InlinedOpenMPRegionRAII {
387   CodeGenFunction &CGF;
388   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
389   FieldDecl *LambdaThisCaptureField = nullptr;
390 
391 public:
392   /// \brief Constructs region for combined constructs.
393   /// \param CodeGen Code generation sequence for combined directives. Includes
394   /// a list of functions used for code generation of implicitly inlined
395   /// regions.
396   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
397                           OpenMPDirectiveKind Kind, bool HasCancel)
398       : CGF(CGF) {
399     // Start emission for the construct.
400     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
401         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
402     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
403     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
404     CGF.LambdaThisCaptureField = nullptr;
405   }
406 
407   ~InlinedOpenMPRegionRAII() {
408     // Restore original CapturedStmtInfo only if we're done with code emission.
409     auto *OldCSI =
410         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
411     delete CGF.CapturedStmtInfo;
412     CGF.CapturedStmtInfo = OldCSI;
413     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
414     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
415   }
416 };
417 
418 /// \brief Values for bit flags used in the ident_t to describe the fields.
419 /// All enumeric elements are named and described in accordance with the code
420 /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
421 enum OpenMPLocationFlags {
422   /// \brief Use trampoline for internal microtask.
423   OMP_IDENT_IMD = 0x01,
424   /// \brief Use c-style ident structure.
425   OMP_IDENT_KMPC = 0x02,
426   /// \brief Atomic reduction option for kmpc_reduce.
427   OMP_ATOMIC_REDUCE = 0x10,
428   /// \brief Explicit 'barrier' directive.
429   OMP_IDENT_BARRIER_EXPL = 0x20,
430   /// \brief Implicit barrier in code.
431   OMP_IDENT_BARRIER_IMPL = 0x40,
432   /// \brief Implicit barrier in 'for' directive.
433   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
434   /// \brief Implicit barrier in 'sections' directive.
435   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
436   /// \brief Implicit barrier in 'single' directive.
437   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140
438 };
439 
440 /// \brief Describes ident structure that describes a source location.
441 /// All descriptions are taken from
442 /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
443 /// Original structure:
444 /// typedef struct ident {
445 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
446 ///                                  see above  */
447 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
448 ///                                  KMP_IDENT_KMPC identifies this union
449 ///                                  member  */
450 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
451 ///                                  see above */
452 ///#if USE_ITT_BUILD
453 ///                            /*  but currently used for storing
454 ///                                region-specific ITT */
455 ///                            /*  contextual information. */
456 ///#endif /* USE_ITT_BUILD */
457 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
458 ///                                 C++  */
459 ///    char const *psource;    /**< String describing the source location.
460 ///                            The string is composed of semi-colon separated
461 //                             fields which describe the source file,
462 ///                            the function and a pair of line numbers that
463 ///                            delimit the construct.
464 ///                             */
465 /// } ident_t;
466 enum IdentFieldIndex {
467   /// \brief might be used in Fortran
468   IdentField_Reserved_1,
469   /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
470   IdentField_Flags,
471   /// \brief Not really used in Fortran any more
472   IdentField_Reserved_2,
473   /// \brief Source[4] in Fortran, do not use for C++
474   IdentField_Reserved_3,
475   /// \brief String describing the source location. The string is composed of
476   /// semi-colon separated fields which describe the source file, the function
477   /// and a pair of line numbers that delimit the construct.
478   IdentField_PSource
479 };
480 
481 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from
482 /// the enum sched_type in kmp.h).
483 enum OpenMPSchedType {
484   /// \brief Lower bound for default (unordered) versions.
485   OMP_sch_lower = 32,
486   OMP_sch_static_chunked = 33,
487   OMP_sch_static = 34,
488   OMP_sch_dynamic_chunked = 35,
489   OMP_sch_guided_chunked = 36,
490   OMP_sch_runtime = 37,
491   OMP_sch_auto = 38,
492   /// static with chunk adjustment (e.g., simd)
493   OMP_sch_static_balanced_chunked = 45,
494   /// \brief Lower bound for 'ordered' versions.
495   OMP_ord_lower = 64,
496   OMP_ord_static_chunked = 65,
497   OMP_ord_static = 66,
498   OMP_ord_dynamic_chunked = 67,
499   OMP_ord_guided_chunked = 68,
500   OMP_ord_runtime = 69,
501   OMP_ord_auto = 70,
502   OMP_sch_default = OMP_sch_static,
503   /// \brief dist_schedule types
504   OMP_dist_sch_static_chunked = 91,
505   OMP_dist_sch_static = 92,
506   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
507   /// Set if the monotonic schedule modifier was present.
508   OMP_sch_modifier_monotonic = (1 << 29),
509   /// Set if the nonmonotonic schedule modifier was present.
510   OMP_sch_modifier_nonmonotonic = (1 << 30),
511 };
512 
513 enum OpenMPRTLFunction {
514   /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
515   /// kmpc_micro microtask, ...);
516   OMPRTL__kmpc_fork_call,
517   /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc,
518   /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
519   OMPRTL__kmpc_threadprivate_cached,
520   /// \brief Call to void __kmpc_threadprivate_register( ident_t *,
521   /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
522   OMPRTL__kmpc_threadprivate_register,
523   // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
524   OMPRTL__kmpc_global_thread_num,
525   // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
526   // kmp_critical_name *crit);
527   OMPRTL__kmpc_critical,
528   // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
529   // global_tid, kmp_critical_name *crit, uintptr_t hint);
530   OMPRTL__kmpc_critical_with_hint,
531   // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
532   // kmp_critical_name *crit);
533   OMPRTL__kmpc_end_critical,
534   // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
535   // global_tid);
536   OMPRTL__kmpc_cancel_barrier,
537   // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
538   OMPRTL__kmpc_barrier,
539   // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
540   OMPRTL__kmpc_for_static_fini,
541   // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
542   // global_tid);
543   OMPRTL__kmpc_serialized_parallel,
544   // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
545   // global_tid);
546   OMPRTL__kmpc_end_serialized_parallel,
547   // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
548   // kmp_int32 num_threads);
549   OMPRTL__kmpc_push_num_threads,
550   // Call to void __kmpc_flush(ident_t *loc);
551   OMPRTL__kmpc_flush,
552   // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
553   OMPRTL__kmpc_master,
554   // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
555   OMPRTL__kmpc_end_master,
556   // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
557   // int end_part);
558   OMPRTL__kmpc_omp_taskyield,
559   // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
560   OMPRTL__kmpc_single,
561   // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
562   OMPRTL__kmpc_end_single,
563   // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
564   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
565   // kmp_routine_entry_t *task_entry);
566   OMPRTL__kmpc_omp_task_alloc,
567   // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
568   // new_task);
569   OMPRTL__kmpc_omp_task,
570   // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
571   // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
572   // kmp_int32 didit);
573   OMPRTL__kmpc_copyprivate,
574   // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
575   // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
576   // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
577   OMPRTL__kmpc_reduce,
578   // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
579   // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
580   // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
581   // *lck);
582   OMPRTL__kmpc_reduce_nowait,
583   // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
584   // kmp_critical_name *lck);
585   OMPRTL__kmpc_end_reduce,
586   // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
587   // kmp_critical_name *lck);
588   OMPRTL__kmpc_end_reduce_nowait,
589   // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
590   // kmp_task_t * new_task);
591   OMPRTL__kmpc_omp_task_begin_if0,
592   // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
593   // kmp_task_t * new_task);
594   OMPRTL__kmpc_omp_task_complete_if0,
595   // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
596   OMPRTL__kmpc_ordered,
597   // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
598   OMPRTL__kmpc_end_ordered,
599   // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
600   // global_tid);
601   OMPRTL__kmpc_omp_taskwait,
602   // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
603   OMPRTL__kmpc_taskgroup,
604   // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
605   OMPRTL__kmpc_end_taskgroup,
606   // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
607   // int proc_bind);
608   OMPRTL__kmpc_push_proc_bind,
609   // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
610   // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
611   // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
612   OMPRTL__kmpc_omp_task_with_deps,
613   // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
614   // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
615   // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
616   OMPRTL__kmpc_omp_wait_deps,
617   // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
618   // global_tid, kmp_int32 cncl_kind);
619   OMPRTL__kmpc_cancellationpoint,
620   // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
621   // kmp_int32 cncl_kind);
622   OMPRTL__kmpc_cancel,
623   // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
624   // kmp_int32 num_teams, kmp_int32 thread_limit);
625   OMPRTL__kmpc_push_num_teams,
626   // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
627   // microtask, ...);
628   OMPRTL__kmpc_fork_teams,
629   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
630   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
631   // sched, kmp_uint64 grainsize, void *task_dup);
632   OMPRTL__kmpc_taskloop,
633   // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
634   // num_dims, struct kmp_dim *dims);
635   OMPRTL__kmpc_doacross_init,
636   // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
637   OMPRTL__kmpc_doacross_fini,
638   // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
639   // *vec);
640   OMPRTL__kmpc_doacross_post,
641   // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
642   // *vec);
643   OMPRTL__kmpc_doacross_wait,
644 
645   //
646   // Offloading related calls
647   //
648   // Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
649   // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
650   // *arg_types);
651   OMPRTL__tgt_target,
652   // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
653   // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
654   // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
655   OMPRTL__tgt_target_teams,
656   // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
657   OMPRTL__tgt_register_lib,
658   // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
659   OMPRTL__tgt_unregister_lib,
660   // Call to void __tgt_target_data_begin(int32_t device_id, int32_t arg_num,
661   // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
662   OMPRTL__tgt_target_data_begin,
663   // Call to void __tgt_target_data_end(int32_t device_id, int32_t arg_num,
664   // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
665   OMPRTL__tgt_target_data_end,
666   // Call to void __tgt_target_data_update(int32_t device_id, int32_t arg_num,
667   // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
668   OMPRTL__tgt_target_data_update,
669 };
670 
671 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
672 /// region.
673 class CleanupTy final : public EHScopeStack::Cleanup {
674   PrePostActionTy *Action;
675 
676 public:
677   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
678   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
679     if (!CGF.HaveInsertPoint())
680       return;
681     Action->Exit(CGF);
682   }
683 };
684 
685 } // anonymous namespace
686 
687 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
688   CodeGenFunction::RunCleanupsScope Scope(CGF);
689   if (PrePostAction) {
690     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
691     Callback(CodeGen, CGF, *PrePostAction);
692   } else {
693     PrePostActionTy Action;
694     Callback(CodeGen, CGF, Action);
695   }
696 }
697 
698 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
699   return CGF.EmitLoadOfPointerLValue(
700       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
701       getThreadIDVariable()->getType()->castAs<PointerType>());
702 }
703 
704 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
705   if (!CGF.HaveInsertPoint())
706     return;
707   // 1.2.2 OpenMP Language Terminology
708   // Structured block - An executable statement with a single entry at the
709   // top and a single exit at the bottom.
710   // The point of exit cannot be a branch out of the structured block.
711   // longjmp() and throw() must not violate the entry/exit criteria.
712   CGF.EHStack.pushTerminate();
713   CodeGen(CGF);
714   CGF.EHStack.popTerminate();
715 }
716 
717 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
718     CodeGenFunction &CGF) {
719   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
720                             getThreadIDVariable()->getType(),
721                             AlignmentSource::Decl);
722 }
723 
724 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
725     : CGM(CGM), OffloadEntriesInfoManager(CGM) {
726   IdentTy = llvm::StructType::create(
727       "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
728       CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
729       CGM.Int8PtrTy /* psource */, nullptr);
730   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
731 
732   loadOffloadInfoMetadata();
733 }
734 
735 void CGOpenMPRuntime::clear() {
736   InternalVars.clear();
737 }
738 
739 static llvm::Function *
740 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
741                           const Expr *CombinerInitializer, const VarDecl *In,
742                           const VarDecl *Out, bool IsCombiner) {
743   // void .omp_combiner.(Ty *in, Ty *out);
744   auto &C = CGM.getContext();
745   QualType PtrTy = C.getPointerType(Ty).withRestrict();
746   FunctionArgList Args;
747   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
748                                /*Id=*/nullptr, PtrTy);
749   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
750                               /*Id=*/nullptr, PtrTy);
751   Args.push_back(&OmpOutParm);
752   Args.push_back(&OmpInParm);
753   auto &FnInfo =
754       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
755   auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
756   auto *Fn = llvm::Function::Create(
757       FnTy, llvm::GlobalValue::InternalLinkage,
758       IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule());
759   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
760   Fn->addFnAttr(llvm::Attribute::AlwaysInline);
761   CodeGenFunction CGF(CGM);
762   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
763   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
764   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
765   CodeGenFunction::OMPPrivateScope Scope(CGF);
766   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
767   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address {
768     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
769         .getAddress();
770   });
771   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
772   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() -> Address {
773     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
774         .getAddress();
775   });
776   (void)Scope.Privatize();
777   CGF.EmitIgnoredExpr(CombinerInitializer);
778   Scope.ForceCleanup();
779   CGF.FinishFunction();
780   return Fn;
781 }
782 
783 void CGOpenMPRuntime::emitUserDefinedReduction(
784     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
785   if (UDRMap.count(D) > 0)
786     return;
787   auto &C = CGM.getContext();
788   if (!In || !Out) {
789     In = &C.Idents.get("omp_in");
790     Out = &C.Idents.get("omp_out");
791   }
792   llvm::Function *Combiner = emitCombinerOrInitializer(
793       CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()),
794       cast<VarDecl>(D->lookup(Out).front()),
795       /*IsCombiner=*/true);
796   llvm::Function *Initializer = nullptr;
797   if (auto *Init = D->getInitializer()) {
798     if (!Priv || !Orig) {
799       Priv = &C.Idents.get("omp_priv");
800       Orig = &C.Idents.get("omp_orig");
801     }
802     Initializer = emitCombinerOrInitializer(
803         CGM, D->getType(), Init, cast<VarDecl>(D->lookup(Orig).front()),
804         cast<VarDecl>(D->lookup(Priv).front()),
805         /*IsCombiner=*/false);
806   }
807   UDRMap.insert(std::make_pair(D, std::make_pair(Combiner, Initializer)));
808   if (CGF) {
809     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
810     Decls.second.push_back(D);
811   }
812 }
813 
814 std::pair<llvm::Function *, llvm::Function *>
815 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
816   auto I = UDRMap.find(D);
817   if (I != UDRMap.end())
818     return I->second;
819   emitUserDefinedReduction(/*CGF=*/nullptr, D);
820   return UDRMap.lookup(D);
821 }
822 
823 // Layout information for ident_t.
824 static CharUnits getIdentAlign(CodeGenModule &CGM) {
825   return CGM.getPointerAlign();
826 }
827 static CharUnits getIdentSize(CodeGenModule &CGM) {
828   assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign()));
829   return CharUnits::fromQuantity(16) + CGM.getPointerSize();
830 }
831 static CharUnits getOffsetOfIdentField(IdentFieldIndex Field) {
832   // All the fields except the last are i32, so this works beautifully.
833   return unsigned(Field) * CharUnits::fromQuantity(4);
834 }
835 static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr,
836                                    IdentFieldIndex Field,
837                                    const llvm::Twine &Name = "") {
838   auto Offset = getOffsetOfIdentField(Field);
839   return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name);
840 }
841 
842 llvm::Value *CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction(
843     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
844     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
845   assert(ThreadIDVar->getType()->isPointerType() &&
846          "thread id variable must be of type kmp_int32 *");
847   const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
848   CodeGenFunction CGF(CGM, true);
849   bool HasCancel = false;
850   if (auto *OPD = dyn_cast<OMPParallelDirective>(&D))
851     HasCancel = OPD->hasCancel();
852   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
853     HasCancel = OPSD->hasCancel();
854   else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
855     HasCancel = OPFD->hasCancel();
856   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
857                                     HasCancel);
858   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
859   return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
860 }
861 
862 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
863     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
864     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
865     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
866     bool Tied, unsigned &NumberOfParts) {
867   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
868                                               PrePostActionTy &) {
869     auto *ThreadID = getThreadID(CGF, D.getLocStart());
870     auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart());
871     llvm::Value *TaskArgs[] = {
872         UpLoc, ThreadID,
873         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
874                                     TaskTVar->getType()->castAs<PointerType>())
875             .getPointer()};
876     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
877   };
878   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
879                                                             UntiedCodeGen);
880   CodeGen.setAction(Action);
881   assert(!ThreadIDVar->getType()->isPointerType() &&
882          "thread id variable must be of type kmp_int32 for tasks");
883   auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
884   auto *TD = dyn_cast<OMPTaskDirective>(&D);
885   CodeGenFunction CGF(CGM, true);
886   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
887                                         InnermostKind,
888                                         TD ? TD->hasCancel() : false, Action);
889   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
890   auto *Res = CGF.GenerateCapturedStmtFunction(*CS);
891   if (!Tied)
892     NumberOfParts = Action.getNumberOfParts();
893   return Res;
894 }
895 
896 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
897   CharUnits Align = getIdentAlign(CGM);
898   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
899   if (!Entry) {
900     if (!DefaultOpenMPPSource) {
901       // Initialize default location for psource field of ident_t structure of
902       // all ident_t objects. Format is ";file;function;line;column;;".
903       // Taken from
904       // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
905       DefaultOpenMPPSource =
906           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
907       DefaultOpenMPPSource =
908           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
909     }
910 
911     ConstantInitBuilder builder(CGM);
912     auto fields = builder.beginStruct(IdentTy);
913     fields.addInt(CGM.Int32Ty, 0);
914     fields.addInt(CGM.Int32Ty, Flags);
915     fields.addInt(CGM.Int32Ty, 0);
916     fields.addInt(CGM.Int32Ty, 0);
917     fields.add(DefaultOpenMPPSource);
918     auto DefaultOpenMPLocation =
919       fields.finishAndCreateGlobal("", Align, /*isConstant*/ true,
920                                    llvm::GlobalValue::PrivateLinkage);
921     DefaultOpenMPLocation->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
922 
923     OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
924   }
925   return Address(Entry, Align);
926 }
927 
928 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
929                                                  SourceLocation Loc,
930                                                  unsigned Flags) {
931   Flags |= OMP_IDENT_KMPC;
932   // If no debug info is generated - return global default location.
933   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
934       Loc.isInvalid())
935     return getOrCreateDefaultLocation(Flags).getPointer();
936 
937   assert(CGF.CurFn && "No function in current CodeGenFunction.");
938 
939   Address LocValue = Address::invalid();
940   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
941   if (I != OpenMPLocThreadIDMap.end())
942     LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM));
943 
944   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
945   // GetOpenMPThreadID was called before this routine.
946   if (!LocValue.isValid()) {
947     // Generate "ident_t .kmpc_loc.addr;"
948     Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM),
949                                       ".kmpc_loc.addr");
950     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
951     Elem.second.DebugLoc = AI.getPointer();
952     LocValue = AI;
953 
954     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
955     CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
956     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
957                              CGM.getSize(getIdentSize(CGF.CGM)));
958   }
959 
960   // char **psource = &.kmpc_loc_<flags>.addr.psource;
961   Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource);
962 
963   auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
964   if (OMPDebugLoc == nullptr) {
965     SmallString<128> Buffer2;
966     llvm::raw_svector_ostream OS2(Buffer2);
967     // Build debug location
968     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
969     OS2 << ";" << PLoc.getFilename() << ";";
970     if (const FunctionDecl *FD =
971             dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
972       OS2 << FD->getQualifiedNameAsString();
973     }
974     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
975     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
976     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
977   }
978   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
979   CGF.Builder.CreateStore(OMPDebugLoc, PSource);
980 
981   // Our callers always pass this to a runtime function, so for
982   // convenience, go ahead and return a naked pointer.
983   return LocValue.getPointer();
984 }
985 
986 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
987                                           SourceLocation Loc) {
988   assert(CGF.CurFn && "No function in current CodeGenFunction.");
989 
990   llvm::Value *ThreadID = nullptr;
991   // Check whether we've already cached a load of the thread id in this
992   // function.
993   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
994   if (I != OpenMPLocThreadIDMap.end()) {
995     ThreadID = I->second.ThreadID;
996     if (ThreadID != nullptr)
997       return ThreadID;
998   }
999   if (auto *OMPRegionInfo =
1000           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1001     if (OMPRegionInfo->getThreadIDVariable()) {
1002       // Check if this an outlined function with thread id passed as argument.
1003       auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1004       ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
1005       // If value loaded in entry block, cache it and use it everywhere in
1006       // function.
1007       if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1008         auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1009         Elem.second.ThreadID = ThreadID;
1010       }
1011       return ThreadID;
1012     }
1013   }
1014 
1015   // This is not an outlined function region - need to call __kmpc_int32
1016   // kmpc_global_thread_num(ident_t *loc).
1017   // Generate thread id value and cache this value for use across the
1018   // function.
1019   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1020   CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
1021   ThreadID =
1022       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1023                           emitUpdateLocation(CGF, Loc));
1024   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1025   Elem.second.ThreadID = ThreadID;
1026   return ThreadID;
1027 }
1028 
1029 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1030   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1031   if (OpenMPLocThreadIDMap.count(CGF.CurFn))
1032     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1033   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1034     for(auto *D : FunctionUDRMap[CGF.CurFn]) {
1035       UDRMap.erase(D);
1036     }
1037     FunctionUDRMap.erase(CGF.CurFn);
1038   }
1039 }
1040 
1041 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1042   if (!IdentTy) {
1043   }
1044   return llvm::PointerType::getUnqual(IdentTy);
1045 }
1046 
1047 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1048   if (!Kmpc_MicroTy) {
1049     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1050     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1051                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1052     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1053   }
1054   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1055 }
1056 
1057 llvm::Constant *
1058 CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1059   llvm::Constant *RTLFn = nullptr;
1060   switch (static_cast<OpenMPRTLFunction>(Function)) {
1061   case OMPRTL__kmpc_fork_call: {
1062     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1063     // microtask, ...);
1064     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1065                                 getKmpc_MicroPointerTy()};
1066     llvm::FunctionType *FnTy =
1067         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1068     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1069     break;
1070   }
1071   case OMPRTL__kmpc_global_thread_num: {
1072     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1073     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1074     llvm::FunctionType *FnTy =
1075         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1076     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1077     break;
1078   }
1079   case OMPRTL__kmpc_threadprivate_cached: {
1080     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1081     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1082     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1083                                 CGM.VoidPtrTy, CGM.SizeTy,
1084                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1085     llvm::FunctionType *FnTy =
1086         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1087     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1088     break;
1089   }
1090   case OMPRTL__kmpc_critical: {
1091     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1092     // kmp_critical_name *crit);
1093     llvm::Type *TypeParams[] = {
1094         getIdentTyPointerTy(), CGM.Int32Ty,
1095         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1096     llvm::FunctionType *FnTy =
1097         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1098     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1099     break;
1100   }
1101   case OMPRTL__kmpc_critical_with_hint: {
1102     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1103     // kmp_critical_name *crit, uintptr_t hint);
1104     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1105                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
1106                                 CGM.IntPtrTy};
1107     llvm::FunctionType *FnTy =
1108         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1109     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1110     break;
1111   }
1112   case OMPRTL__kmpc_threadprivate_register: {
1113     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1114     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1115     // typedef void *(*kmpc_ctor)(void *);
1116     auto KmpcCtorTy =
1117         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1118                                 /*isVarArg*/ false)->getPointerTo();
1119     // typedef void *(*kmpc_cctor)(void *, void *);
1120     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1121     auto KmpcCopyCtorTy =
1122         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1123                                 /*isVarArg*/ false)->getPointerTo();
1124     // typedef void (*kmpc_dtor)(void *);
1125     auto KmpcDtorTy =
1126         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1127             ->getPointerTo();
1128     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1129                               KmpcCopyCtorTy, KmpcDtorTy};
1130     auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1131                                         /*isVarArg*/ false);
1132     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1133     break;
1134   }
1135   case OMPRTL__kmpc_end_critical: {
1136     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1137     // kmp_critical_name *crit);
1138     llvm::Type *TypeParams[] = {
1139         getIdentTyPointerTy(), CGM.Int32Ty,
1140         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1141     llvm::FunctionType *FnTy =
1142         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1143     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1144     break;
1145   }
1146   case OMPRTL__kmpc_cancel_barrier: {
1147     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1148     // global_tid);
1149     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1150     llvm::FunctionType *FnTy =
1151         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1152     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1153     break;
1154   }
1155   case OMPRTL__kmpc_barrier: {
1156     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1157     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1158     llvm::FunctionType *FnTy =
1159         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1160     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1161     break;
1162   }
1163   case OMPRTL__kmpc_for_static_fini: {
1164     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1165     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1166     llvm::FunctionType *FnTy =
1167         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1168     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1169     break;
1170   }
1171   case OMPRTL__kmpc_push_num_threads: {
1172     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1173     // kmp_int32 num_threads)
1174     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1175                                 CGM.Int32Ty};
1176     llvm::FunctionType *FnTy =
1177         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1178     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1179     break;
1180   }
1181   case OMPRTL__kmpc_serialized_parallel: {
1182     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1183     // global_tid);
1184     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1185     llvm::FunctionType *FnTy =
1186         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1187     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1188     break;
1189   }
1190   case OMPRTL__kmpc_end_serialized_parallel: {
1191     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1192     // global_tid);
1193     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1194     llvm::FunctionType *FnTy =
1195         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1196     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1197     break;
1198   }
1199   case OMPRTL__kmpc_flush: {
1200     // Build void __kmpc_flush(ident_t *loc);
1201     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1202     llvm::FunctionType *FnTy =
1203         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1204     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1205     break;
1206   }
1207   case OMPRTL__kmpc_master: {
1208     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1209     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1210     llvm::FunctionType *FnTy =
1211         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1212     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1213     break;
1214   }
1215   case OMPRTL__kmpc_end_master: {
1216     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1217     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1218     llvm::FunctionType *FnTy =
1219         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1220     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1221     break;
1222   }
1223   case OMPRTL__kmpc_omp_taskyield: {
1224     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1225     // int end_part);
1226     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1227     llvm::FunctionType *FnTy =
1228         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1229     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1230     break;
1231   }
1232   case OMPRTL__kmpc_single: {
1233     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1234     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1235     llvm::FunctionType *FnTy =
1236         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1237     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1238     break;
1239   }
1240   case OMPRTL__kmpc_end_single: {
1241     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1242     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1243     llvm::FunctionType *FnTy =
1244         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1245     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1246     break;
1247   }
1248   case OMPRTL__kmpc_omp_task_alloc: {
1249     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1250     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1251     // kmp_routine_entry_t *task_entry);
1252     assert(KmpRoutineEntryPtrTy != nullptr &&
1253            "Type kmp_routine_entry_t must be created.");
1254     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1255                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
1256     // Return void * and then cast to particular kmp_task_t type.
1257     llvm::FunctionType *FnTy =
1258         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1259     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
1260     break;
1261   }
1262   case OMPRTL__kmpc_omp_task: {
1263     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1264     // *new_task);
1265     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1266                                 CGM.VoidPtrTy};
1267     llvm::FunctionType *FnTy =
1268         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1269     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
1270     break;
1271   }
1272   case OMPRTL__kmpc_copyprivate: {
1273     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
1274     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
1275     // kmp_int32 didit);
1276     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1277     auto *CpyFnTy =
1278         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
1279     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
1280                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
1281                                 CGM.Int32Ty};
1282     llvm::FunctionType *FnTy =
1283         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1284     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
1285     break;
1286   }
1287   case OMPRTL__kmpc_reduce: {
1288     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
1289     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
1290     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
1291     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1292     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1293                                                /*isVarArg=*/false);
1294     llvm::Type *TypeParams[] = {
1295         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1296         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1297         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1298     llvm::FunctionType *FnTy =
1299         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1300     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
1301     break;
1302   }
1303   case OMPRTL__kmpc_reduce_nowait: {
1304     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
1305     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
1306     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
1307     // *lck);
1308     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1309     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1310                                                /*isVarArg=*/false);
1311     llvm::Type *TypeParams[] = {
1312         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1313         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1314         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1315     llvm::FunctionType *FnTy =
1316         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1317     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
1318     break;
1319   }
1320   case OMPRTL__kmpc_end_reduce: {
1321     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
1322     // kmp_critical_name *lck);
1323     llvm::Type *TypeParams[] = {
1324         getIdentTyPointerTy(), CGM.Int32Ty,
1325         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1326     llvm::FunctionType *FnTy =
1327         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1328     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
1329     break;
1330   }
1331   case OMPRTL__kmpc_end_reduce_nowait: {
1332     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
1333     // kmp_critical_name *lck);
1334     llvm::Type *TypeParams[] = {
1335         getIdentTyPointerTy(), CGM.Int32Ty,
1336         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1337     llvm::FunctionType *FnTy =
1338         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1339     RTLFn =
1340         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
1341     break;
1342   }
1343   case OMPRTL__kmpc_omp_task_begin_if0: {
1344     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1345     // *new_task);
1346     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1347                                 CGM.VoidPtrTy};
1348     llvm::FunctionType *FnTy =
1349         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1350     RTLFn =
1351         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
1352     break;
1353   }
1354   case OMPRTL__kmpc_omp_task_complete_if0: {
1355     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1356     // *new_task);
1357     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1358                                 CGM.VoidPtrTy};
1359     llvm::FunctionType *FnTy =
1360         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1361     RTLFn = CGM.CreateRuntimeFunction(FnTy,
1362                                       /*Name=*/"__kmpc_omp_task_complete_if0");
1363     break;
1364   }
1365   case OMPRTL__kmpc_ordered: {
1366     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
1367     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1368     llvm::FunctionType *FnTy =
1369         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1370     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
1371     break;
1372   }
1373   case OMPRTL__kmpc_end_ordered: {
1374     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
1375     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1376     llvm::FunctionType *FnTy =
1377         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1378     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
1379     break;
1380   }
1381   case OMPRTL__kmpc_omp_taskwait: {
1382     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
1383     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1384     llvm::FunctionType *FnTy =
1385         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1386     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
1387     break;
1388   }
1389   case OMPRTL__kmpc_taskgroup: {
1390     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
1391     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1392     llvm::FunctionType *FnTy =
1393         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1394     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
1395     break;
1396   }
1397   case OMPRTL__kmpc_end_taskgroup: {
1398     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
1399     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1400     llvm::FunctionType *FnTy =
1401         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1402     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
1403     break;
1404   }
1405   case OMPRTL__kmpc_push_proc_bind: {
1406     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
1407     // int proc_bind)
1408     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1409     llvm::FunctionType *FnTy =
1410         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1411     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
1412     break;
1413   }
1414   case OMPRTL__kmpc_omp_task_with_deps: {
1415     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
1416     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
1417     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
1418     llvm::Type *TypeParams[] = {
1419         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
1420         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
1421     llvm::FunctionType *FnTy =
1422         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1423     RTLFn =
1424         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
1425     break;
1426   }
1427   case OMPRTL__kmpc_omp_wait_deps: {
1428     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
1429     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
1430     // kmp_depend_info_t *noalias_dep_list);
1431     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1432                                 CGM.Int32Ty,           CGM.VoidPtrTy,
1433                                 CGM.Int32Ty,           CGM.VoidPtrTy};
1434     llvm::FunctionType *FnTy =
1435         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1436     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
1437     break;
1438   }
1439   case OMPRTL__kmpc_cancellationpoint: {
1440     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
1441     // global_tid, kmp_int32 cncl_kind)
1442     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1443     llvm::FunctionType *FnTy =
1444         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1445     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
1446     break;
1447   }
1448   case OMPRTL__kmpc_cancel: {
1449     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
1450     // kmp_int32 cncl_kind)
1451     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1452     llvm::FunctionType *FnTy =
1453         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1454     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
1455     break;
1456   }
1457   case OMPRTL__kmpc_push_num_teams: {
1458     // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
1459     // kmp_int32 num_teams, kmp_int32 num_threads)
1460     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1461         CGM.Int32Ty};
1462     llvm::FunctionType *FnTy =
1463         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1464     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
1465     break;
1466   }
1467   case OMPRTL__kmpc_fork_teams: {
1468     // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
1469     // microtask, ...);
1470     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1471                                 getKmpc_MicroPointerTy()};
1472     llvm::FunctionType *FnTy =
1473         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1474     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
1475     break;
1476   }
1477   case OMPRTL__kmpc_taskloop: {
1478     // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
1479     // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
1480     // sched, kmp_uint64 grainsize, void *task_dup);
1481     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
1482                                 CGM.IntTy,
1483                                 CGM.VoidPtrTy,
1484                                 CGM.IntTy,
1485                                 CGM.Int64Ty->getPointerTo(),
1486                                 CGM.Int64Ty->getPointerTo(),
1487                                 CGM.Int64Ty,
1488                                 CGM.IntTy,
1489                                 CGM.IntTy,
1490                                 CGM.Int64Ty,
1491                                 CGM.VoidPtrTy};
1492     llvm::FunctionType *FnTy =
1493         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1494     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
1495     break;
1496   }
1497   case OMPRTL__kmpc_doacross_init: {
1498     // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
1499     // num_dims, struct kmp_dim *dims);
1500     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
1501                                 CGM.Int32Ty,
1502                                 CGM.Int32Ty,
1503                                 CGM.VoidPtrTy};
1504     llvm::FunctionType *FnTy =
1505         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1506     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
1507     break;
1508   }
1509   case OMPRTL__kmpc_doacross_fini: {
1510     // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
1511     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1512     llvm::FunctionType *FnTy =
1513         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1514     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
1515     break;
1516   }
1517   case OMPRTL__kmpc_doacross_post: {
1518     // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
1519     // *vec);
1520     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1521                                 CGM.Int64Ty->getPointerTo()};
1522     llvm::FunctionType *FnTy =
1523         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1524     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
1525     break;
1526   }
1527   case OMPRTL__kmpc_doacross_wait: {
1528     // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
1529     // *vec);
1530     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1531                                 CGM.Int64Ty->getPointerTo()};
1532     llvm::FunctionType *FnTy =
1533         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1534     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
1535     break;
1536   }
1537   case OMPRTL__tgt_target: {
1538     // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
1539     // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
1540     // *arg_types);
1541     llvm::Type *TypeParams[] = {CGM.Int32Ty,
1542                                 CGM.VoidPtrTy,
1543                                 CGM.Int32Ty,
1544                                 CGM.VoidPtrPtrTy,
1545                                 CGM.VoidPtrPtrTy,
1546                                 CGM.SizeTy->getPointerTo(),
1547                                 CGM.Int32Ty->getPointerTo()};
1548     llvm::FunctionType *FnTy =
1549         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1550     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
1551     break;
1552   }
1553   case OMPRTL__tgt_target_teams: {
1554     // Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
1555     // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
1556     // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
1557     llvm::Type *TypeParams[] = {CGM.Int32Ty,
1558                                 CGM.VoidPtrTy,
1559                                 CGM.Int32Ty,
1560                                 CGM.VoidPtrPtrTy,
1561                                 CGM.VoidPtrPtrTy,
1562                                 CGM.SizeTy->getPointerTo(),
1563                                 CGM.Int32Ty->getPointerTo(),
1564                                 CGM.Int32Ty,
1565                                 CGM.Int32Ty};
1566     llvm::FunctionType *FnTy =
1567         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1568     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
1569     break;
1570   }
1571   case OMPRTL__tgt_register_lib: {
1572     // Build void __tgt_register_lib(__tgt_bin_desc *desc);
1573     QualType ParamTy =
1574         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
1575     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
1576     llvm::FunctionType *FnTy =
1577         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1578     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
1579     break;
1580   }
1581   case OMPRTL__tgt_unregister_lib: {
1582     // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
1583     QualType ParamTy =
1584         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
1585     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
1586     llvm::FunctionType *FnTy =
1587         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1588     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
1589     break;
1590   }
1591   case OMPRTL__tgt_target_data_begin: {
1592     // Build void __tgt_target_data_begin(int32_t device_id, int32_t arg_num,
1593     // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
1594     llvm::Type *TypeParams[] = {CGM.Int32Ty,
1595                                 CGM.Int32Ty,
1596                                 CGM.VoidPtrPtrTy,
1597                                 CGM.VoidPtrPtrTy,
1598                                 CGM.SizeTy->getPointerTo(),
1599                                 CGM.Int32Ty->getPointerTo()};
1600     llvm::FunctionType *FnTy =
1601         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1602     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
1603     break;
1604   }
1605   case OMPRTL__tgt_target_data_end: {
1606     // Build void __tgt_target_data_end(int32_t device_id, int32_t arg_num,
1607     // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
1608     llvm::Type *TypeParams[] = {CGM.Int32Ty,
1609                                 CGM.Int32Ty,
1610                                 CGM.VoidPtrPtrTy,
1611                                 CGM.VoidPtrPtrTy,
1612                                 CGM.SizeTy->getPointerTo(),
1613                                 CGM.Int32Ty->getPointerTo()};
1614     llvm::FunctionType *FnTy =
1615         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1616     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
1617     break;
1618   }
1619   case OMPRTL__tgt_target_data_update: {
1620     // Build void __tgt_target_data_update(int32_t device_id, int32_t arg_num,
1621     // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
1622     llvm::Type *TypeParams[] = {CGM.Int32Ty,
1623                                 CGM.Int32Ty,
1624                                 CGM.VoidPtrPtrTy,
1625                                 CGM.VoidPtrPtrTy,
1626                                 CGM.SizeTy->getPointerTo(),
1627                                 CGM.Int32Ty->getPointerTo()};
1628     llvm::FunctionType *FnTy =
1629         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1630     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
1631     break;
1632   }
1633   }
1634   assert(RTLFn && "Unable to find OpenMP runtime function");
1635   return RTLFn;
1636 }
1637 
1638 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
1639                                                              bool IVSigned) {
1640   assert((IVSize == 32 || IVSize == 64) &&
1641          "IV size is not compatible with the omp runtime");
1642   auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1643                                        : "__kmpc_for_static_init_4u")
1644                            : (IVSigned ? "__kmpc_for_static_init_8"
1645                                        : "__kmpc_for_static_init_8u");
1646   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1647   auto PtrTy = llvm::PointerType::getUnqual(ITy);
1648   llvm::Type *TypeParams[] = {
1649     getIdentTyPointerTy(),                     // loc
1650     CGM.Int32Ty,                               // tid
1651     CGM.Int32Ty,                               // schedtype
1652     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1653     PtrTy,                                     // p_lower
1654     PtrTy,                                     // p_upper
1655     PtrTy,                                     // p_stride
1656     ITy,                                       // incr
1657     ITy                                        // chunk
1658   };
1659   llvm::FunctionType *FnTy =
1660       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1661   return CGM.CreateRuntimeFunction(FnTy, Name);
1662 }
1663 
1664 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
1665                                                             bool IVSigned) {
1666   assert((IVSize == 32 || IVSize == 64) &&
1667          "IV size is not compatible with the omp runtime");
1668   auto Name =
1669       IVSize == 32
1670           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1671           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1672   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1673   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1674                                CGM.Int32Ty,           // tid
1675                                CGM.Int32Ty,           // schedtype
1676                                ITy,                   // lower
1677                                ITy,                   // upper
1678                                ITy,                   // stride
1679                                ITy                    // chunk
1680   };
1681   llvm::FunctionType *FnTy =
1682       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1683   return CGM.CreateRuntimeFunction(FnTy, Name);
1684 }
1685 
1686 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
1687                                                             bool IVSigned) {
1688   assert((IVSize == 32 || IVSize == 64) &&
1689          "IV size is not compatible with the omp runtime");
1690   auto Name =
1691       IVSize == 32
1692           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1693           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1694   llvm::Type *TypeParams[] = {
1695       getIdentTyPointerTy(), // loc
1696       CGM.Int32Ty,           // tid
1697   };
1698   llvm::FunctionType *FnTy =
1699       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1700   return CGM.CreateRuntimeFunction(FnTy, Name);
1701 }
1702 
1703 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
1704                                                             bool IVSigned) {
1705   assert((IVSize == 32 || IVSize == 64) &&
1706          "IV size is not compatible with the omp runtime");
1707   auto Name =
1708       IVSize == 32
1709           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1710           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1711   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1712   auto PtrTy = llvm::PointerType::getUnqual(ITy);
1713   llvm::Type *TypeParams[] = {
1714     getIdentTyPointerTy(),                     // loc
1715     CGM.Int32Ty,                               // tid
1716     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1717     PtrTy,                                     // p_lower
1718     PtrTy,                                     // p_upper
1719     PtrTy                                      // p_stride
1720   };
1721   llvm::FunctionType *FnTy =
1722       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1723   return CGM.CreateRuntimeFunction(FnTy, Name);
1724 }
1725 
1726 llvm::Constant *
1727 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1728   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1729          !CGM.getContext().getTargetInfo().isTLSSupported());
1730   // Lookup the entry, lazily creating it if necessary.
1731   return getOrCreateInternalVariable(CGM.Int8PtrPtrTy,
1732                                      Twine(CGM.getMangledName(VD)) + ".cache.");
1733 }
1734 
1735 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1736                                                 const VarDecl *VD,
1737                                                 Address VDAddr,
1738                                                 SourceLocation Loc) {
1739   if (CGM.getLangOpts().OpenMPUseTLS &&
1740       CGM.getContext().getTargetInfo().isTLSSupported())
1741     return VDAddr;
1742 
1743   auto VarTy = VDAddr.getElementType();
1744   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1745                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1746                                                        CGM.Int8PtrTy),
1747                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1748                          getOrCreateThreadPrivateCache(VD)};
1749   return Address(CGF.EmitRuntimeCall(
1750       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
1751                  VDAddr.getAlignment());
1752 }
1753 
1754 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1755     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1756     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1757   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1758   // library.
1759   auto OMPLoc = emitUpdateLocation(CGF, Loc);
1760   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1761                       OMPLoc);
1762   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1763   // to register constructor/destructor for variable.
1764   llvm::Value *Args[] = {OMPLoc,
1765                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1766                                                        CGM.VoidPtrTy),
1767                          Ctor, CopyCtor, Dtor};
1768   CGF.EmitRuntimeCall(
1769       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
1770 }
1771 
1772 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1773     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1774     bool PerformInit, CodeGenFunction *CGF) {
1775   if (CGM.getLangOpts().OpenMPUseTLS &&
1776       CGM.getContext().getTargetInfo().isTLSSupported())
1777     return nullptr;
1778 
1779   VD = VD->getDefinition(CGM.getContext());
1780   if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
1781     ThreadPrivateWithDefinition.insert(VD);
1782     QualType ASTTy = VD->getType();
1783 
1784     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1785     auto Init = VD->getAnyInitializer();
1786     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1787       // Generate function that re-emits the declaration's initializer into the
1788       // threadprivate copy of the variable VD
1789       CodeGenFunction CtorCGF(CGM);
1790       FunctionArgList Args;
1791       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
1792                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
1793       Args.push_back(&Dst);
1794 
1795       auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1796           CGM.getContext().VoidPtrTy, Args);
1797       auto FTy = CGM.getTypes().GetFunctionType(FI);
1798       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
1799           FTy, ".__kmpc_global_ctor_.", FI, Loc);
1800       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1801                             Args, SourceLocation());
1802       auto ArgVal = CtorCGF.EmitLoadOfScalar(
1803           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1804           CGM.getContext().VoidPtrTy, Dst.getLocation());
1805       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1806       Arg = CtorCGF.Builder.CreateElementBitCast(Arg,
1807                                              CtorCGF.ConvertTypeForMem(ASTTy));
1808       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1809                                /*IsInitializer=*/true);
1810       ArgVal = CtorCGF.EmitLoadOfScalar(
1811           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1812           CGM.getContext().VoidPtrTy, Dst.getLocation());
1813       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1814       CtorCGF.FinishFunction();
1815       Ctor = Fn;
1816     }
1817     if (VD->getType().isDestructedType() != QualType::DK_none) {
1818       // Generate function that emits destructor call for the threadprivate copy
1819       // of the variable VD
1820       CodeGenFunction DtorCGF(CGM);
1821       FunctionArgList Args;
1822       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
1823                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
1824       Args.push_back(&Dst);
1825 
1826       auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1827           CGM.getContext().VoidTy, Args);
1828       auto FTy = CGM.getTypes().GetFunctionType(FI);
1829       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
1830           FTy, ".__kmpc_global_dtor_.", FI, Loc);
1831       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1832       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1833                             SourceLocation());
1834       // Create a scope with an artificial location for the body of this function.
1835       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1836       auto ArgVal = DtorCGF.EmitLoadOfScalar(
1837           DtorCGF.GetAddrOfLocalVar(&Dst),
1838           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1839       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1840                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1841                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1842       DtorCGF.FinishFunction();
1843       Dtor = Fn;
1844     }
1845     // Do not emit init function if it is not required.
1846     if (!Ctor && !Dtor)
1847       return nullptr;
1848 
1849     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1850     auto CopyCtorTy =
1851         llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1852                                 /*isVarArg=*/false)->getPointerTo();
1853     // Copying constructor for the threadprivate variable.
1854     // Must be NULL - reserved by runtime, but currently it requires that this
1855     // parameter is always NULL. Otherwise it fires assertion.
1856     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1857     if (Ctor == nullptr) {
1858       auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1859                                             /*isVarArg=*/false)->getPointerTo();
1860       Ctor = llvm::Constant::getNullValue(CtorTy);
1861     }
1862     if (Dtor == nullptr) {
1863       auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1864                                             /*isVarArg=*/false)->getPointerTo();
1865       Dtor = llvm::Constant::getNullValue(DtorTy);
1866     }
1867     if (!CGF) {
1868       auto InitFunctionTy =
1869           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1870       auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
1871           InitFunctionTy, ".__omp_threadprivate_init_.",
1872           CGM.getTypes().arrangeNullaryFunction());
1873       CodeGenFunction InitCGF(CGM);
1874       FunctionArgList ArgList;
1875       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1876                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1877                             Loc);
1878       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1879       InitCGF.FinishFunction();
1880       return InitFunction;
1881     }
1882     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1883   }
1884   return nullptr;
1885 }
1886 
1887 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
1888 /// function. Here is the logic:
1889 /// if (Cond) {
1890 ///   ThenGen();
1891 /// } else {
1892 ///   ElseGen();
1893 /// }
1894 static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
1895                             const RegionCodeGenTy &ThenGen,
1896                             const RegionCodeGenTy &ElseGen) {
1897   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1898 
1899   // If the condition constant folds and can be elided, try to avoid emitting
1900   // the condition and the dead arm of the if/else.
1901   bool CondConstant;
1902   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1903     if (CondConstant)
1904       ThenGen(CGF);
1905     else
1906       ElseGen(CGF);
1907     return;
1908   }
1909 
1910   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
1911   // emit the conditional branch.
1912   auto ThenBlock = CGF.createBasicBlock("omp_if.then");
1913   auto ElseBlock = CGF.createBasicBlock("omp_if.else");
1914   auto ContBlock = CGF.createBasicBlock("omp_if.end");
1915   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1916 
1917   // Emit the 'then' code.
1918   CGF.EmitBlock(ThenBlock);
1919   ThenGen(CGF);
1920   CGF.EmitBranch(ContBlock);
1921   // Emit the 'else' code if present.
1922   // There is no need to emit line number for unconditional branch.
1923   (void)ApplyDebugLocation::CreateEmpty(CGF);
1924   CGF.EmitBlock(ElseBlock);
1925   ElseGen(CGF);
1926   // There is no need to emit line number for unconditional branch.
1927   (void)ApplyDebugLocation::CreateEmpty(CGF);
1928   CGF.EmitBranch(ContBlock);
1929   // Emit the continuation block for code after the if.
1930   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1931 }
1932 
1933 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1934                                        llvm::Value *OutlinedFn,
1935                                        ArrayRef<llvm::Value *> CapturedVars,
1936                                        const Expr *IfCond) {
1937   if (!CGF.HaveInsertPoint())
1938     return;
1939   auto *RTLoc = emitUpdateLocation(CGF, Loc);
1940   auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
1941                                                      PrePostActionTy &) {
1942     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1943     auto &RT = CGF.CGM.getOpenMPRuntime();
1944     llvm::Value *Args[] = {
1945         RTLoc,
1946         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1947         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
1948     llvm::SmallVector<llvm::Value *, 16> RealArgs;
1949     RealArgs.append(std::begin(Args), std::end(Args));
1950     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1951 
1952     auto RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
1953     CGF.EmitRuntimeCall(RTLFn, RealArgs);
1954   };
1955   auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
1956                                                           PrePostActionTy &) {
1957     auto &RT = CGF.CGM.getOpenMPRuntime();
1958     auto ThreadID = RT.getThreadID(CGF, Loc);
1959     // Build calls:
1960     // __kmpc_serialized_parallel(&Loc, GTid);
1961     llvm::Value *Args[] = {RTLoc, ThreadID};
1962     CGF.EmitRuntimeCall(
1963         RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
1964 
1965     // OutlinedFn(&GTid, &zero, CapturedStruct);
1966     auto ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1967     Address ZeroAddr =
1968         CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
1969                              /*Name*/ ".zero.addr");
1970     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
1971     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
1972     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
1973     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
1974     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1975     CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
1976 
1977     // __kmpc_end_serialized_parallel(&Loc, GTid);
1978     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1979     CGF.EmitRuntimeCall(
1980         RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
1981         EndArgs);
1982   };
1983   if (IfCond)
1984     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
1985   else {
1986     RegionCodeGenTy ThenRCG(ThenGen);
1987     ThenRCG(CGF);
1988   }
1989 }
1990 
1991 // If we're inside an (outlined) parallel region, use the region info's
1992 // thread-ID variable (it is passed in a first argument of the outlined function
1993 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1994 // regular serial code region, get thread ID by calling kmp_int32
1995 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1996 // return the address of that temp.
1997 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
1998                                              SourceLocation Loc) {
1999   if (auto *OMPRegionInfo =
2000           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2001     if (OMPRegionInfo->getThreadIDVariable())
2002       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
2003 
2004   auto ThreadID = getThreadID(CGF, Loc);
2005   auto Int32Ty =
2006       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2007   auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2008   CGF.EmitStoreOfScalar(ThreadID,
2009                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2010 
2011   return ThreadIDTemp;
2012 }
2013 
2014 llvm::Constant *
2015 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
2016                                              const llvm::Twine &Name) {
2017   SmallString<256> Buffer;
2018   llvm::raw_svector_ostream Out(Buffer);
2019   Out << Name;
2020   auto RuntimeName = Out.str();
2021   auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
2022   if (Elem.second) {
2023     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2024            "OMP internal variable has different type than requested");
2025     return &*Elem.second;
2026   }
2027 
2028   return Elem.second = new llvm::GlobalVariable(
2029              CGM.getModule(), Ty, /*IsConstant*/ false,
2030              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2031              Elem.first());
2032 }
2033 
2034 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2035   llvm::Twine Name(".gomp_critical_user_", CriticalName);
2036   return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
2037 }
2038 
2039 namespace {
2040 /// Common pre(post)-action for different OpenMP constructs.
2041 class CommonActionTy final : public PrePostActionTy {
2042   llvm::Value *EnterCallee;
2043   ArrayRef<llvm::Value *> EnterArgs;
2044   llvm::Value *ExitCallee;
2045   ArrayRef<llvm::Value *> ExitArgs;
2046   bool Conditional;
2047   llvm::BasicBlock *ContBlock = nullptr;
2048 
2049 public:
2050   CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs,
2051                  llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs,
2052                  bool Conditional = false)
2053       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2054         ExitArgs(ExitArgs), Conditional(Conditional) {}
2055   void Enter(CodeGenFunction &CGF) override {
2056     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2057     if (Conditional) {
2058       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2059       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2060       ContBlock = CGF.createBasicBlock("omp_if.end");
2061       // Generate the branch (If-stmt)
2062       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2063       CGF.EmitBlock(ThenBlock);
2064     }
2065   }
2066   void Done(CodeGenFunction &CGF) {
2067     // Emit the rest of blocks/branches
2068     CGF.EmitBranch(ContBlock);
2069     CGF.EmitBlock(ContBlock, true);
2070   }
2071   void Exit(CodeGenFunction &CGF) override {
2072     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2073   }
2074 };
2075 } // anonymous namespace
2076 
2077 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2078                                          StringRef CriticalName,
2079                                          const RegionCodeGenTy &CriticalOpGen,
2080                                          SourceLocation Loc, const Expr *Hint) {
2081   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2082   // CriticalOpGen();
2083   // __kmpc_end_critical(ident_t *, gtid, Lock);
2084   // Prepare arguments and build a call to __kmpc_critical
2085   if (!CGF.HaveInsertPoint())
2086     return;
2087   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2088                          getCriticalRegionLock(CriticalName)};
2089   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2090                                                 std::end(Args));
2091   if (Hint) {
2092     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2093         CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
2094   }
2095   CommonActionTy Action(
2096       createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
2097                                  : OMPRTL__kmpc_critical),
2098       EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
2099   CriticalOpGen.setAction(Action);
2100   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2101 }
2102 
2103 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2104                                        const RegionCodeGenTy &MasterOpGen,
2105                                        SourceLocation Loc) {
2106   if (!CGF.HaveInsertPoint())
2107     return;
2108   // if(__kmpc_master(ident_t *, gtid)) {
2109   //   MasterOpGen();
2110   //   __kmpc_end_master(ident_t *, gtid);
2111   // }
2112   // Prepare arguments and build a call to __kmpc_master
2113   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2114   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
2115                         createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
2116                         /*Conditional=*/true);
2117   MasterOpGen.setAction(Action);
2118   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2119   Action.Done(CGF);
2120 }
2121 
2122 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2123                                         SourceLocation Loc) {
2124   if (!CGF.HaveInsertPoint())
2125     return;
2126   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2127   llvm::Value *Args[] = {
2128       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2129       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2130   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
2131   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2132     Region->emitUntiedSwitch(CGF);
2133 }
2134 
2135 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2136                                           const RegionCodeGenTy &TaskgroupOpGen,
2137                                           SourceLocation Loc) {
2138   if (!CGF.HaveInsertPoint())
2139     return;
2140   // __kmpc_taskgroup(ident_t *, gtid);
2141   // TaskgroupOpGen();
2142   // __kmpc_end_taskgroup(ident_t *, gtid);
2143   // Prepare arguments and build a call to __kmpc_taskgroup
2144   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2145   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
2146                         createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
2147                         Args);
2148   TaskgroupOpGen.setAction(Action);
2149   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2150 }
2151 
2152 /// Given an array of pointers to variables, project the address of a
2153 /// given variable.
2154 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2155                                       unsigned Index, const VarDecl *Var) {
2156   // Pull out the pointer to the variable.
2157   Address PtrAddr =
2158       CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize());
2159   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2160 
2161   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2162   Addr = CGF.Builder.CreateElementBitCast(
2163       Addr, CGF.ConvertTypeForMem(Var->getType()));
2164   return Addr;
2165 }
2166 
2167 static llvm::Value *emitCopyprivateCopyFunction(
2168     CodeGenModule &CGM, llvm::Type *ArgsType,
2169     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2170     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) {
2171   auto &C = CGM.getContext();
2172   // void copy_func(void *LHSArg, void *RHSArg);
2173   FunctionArgList Args;
2174   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
2175                            C.VoidPtrTy);
2176   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
2177                            C.VoidPtrTy);
2178   Args.push_back(&LHSArg);
2179   Args.push_back(&RHSArg);
2180   auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2181   auto *Fn = llvm::Function::Create(
2182       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
2183       ".omp.copyprivate.copy_func", &CGM.getModule());
2184   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
2185   CodeGenFunction CGF(CGM);
2186   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
2187   // Dest = (void*[n])(LHSArg);
2188   // Src = (void*[n])(RHSArg);
2189   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2190       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2191       ArgsType), CGF.getPointerAlign());
2192   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2193       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2194       ArgsType), CGF.getPointerAlign());
2195   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2196   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2197   // ...
2198   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2199   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2200     auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2201     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2202 
2203     auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2204     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2205 
2206     auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2207     QualType Type = VD->getType();
2208     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2209   }
2210   CGF.FinishFunction();
2211   return Fn;
2212 }
2213 
2214 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2215                                        const RegionCodeGenTy &SingleOpGen,
2216                                        SourceLocation Loc,
2217                                        ArrayRef<const Expr *> CopyprivateVars,
2218                                        ArrayRef<const Expr *> SrcExprs,
2219                                        ArrayRef<const Expr *> DstExprs,
2220                                        ArrayRef<const Expr *> AssignmentOps) {
2221   if (!CGF.HaveInsertPoint())
2222     return;
2223   assert(CopyprivateVars.size() == SrcExprs.size() &&
2224          CopyprivateVars.size() == DstExprs.size() &&
2225          CopyprivateVars.size() == AssignmentOps.size());
2226   auto &C = CGM.getContext();
2227   // int32 did_it = 0;
2228   // if(__kmpc_single(ident_t *, gtid)) {
2229   //   SingleOpGen();
2230   //   __kmpc_end_single(ident_t *, gtid);
2231   //   did_it = 1;
2232   // }
2233   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2234   // <copy_func>, did_it);
2235 
2236   Address DidIt = Address::invalid();
2237   if (!CopyprivateVars.empty()) {
2238     // int32 did_it = 0;
2239     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2240     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2241     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2242   }
2243   // Prepare arguments and build a call to __kmpc_single
2244   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2245   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
2246                         createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
2247                         /*Conditional=*/true);
2248   SingleOpGen.setAction(Action);
2249   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2250   if (DidIt.isValid()) {
2251     // did_it = 1;
2252     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2253   }
2254   Action.Done(CGF);
2255   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2256   // <copy_func>, did_it);
2257   if (DidIt.isValid()) {
2258     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2259     auto CopyprivateArrayTy =
2260         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
2261                                /*IndexTypeQuals=*/0);
2262     // Create a list of all private variables for copyprivate.
2263     Address CopyprivateList =
2264         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2265     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2266       Address Elem = CGF.Builder.CreateConstArrayGEP(
2267           CopyprivateList, I, CGF.getPointerSize());
2268       CGF.Builder.CreateStore(
2269           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2270               CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
2271           Elem);
2272     }
2273     // Build function that copies private values from single region to all other
2274     // threads in the corresponding parallel region.
2275     auto *CpyFn = emitCopyprivateCopyFunction(
2276         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2277         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
2278     auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2279     Address CL =
2280       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2281                                                       CGF.VoidPtrTy);
2282     auto *DidItVal = CGF.Builder.CreateLoad(DidIt);
2283     llvm::Value *Args[] = {
2284         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2285         getThreadID(CGF, Loc),        // i32 <gtid>
2286         BufSize,                      // size_t <buf_size>
2287         CL.getPointer(),              // void *<copyprivate list>
2288         CpyFn,                        // void (*) (void *, void *) <copy_func>
2289         DidItVal                      // i32 did_it
2290     };
2291     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
2292   }
2293 }
2294 
2295 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2296                                         const RegionCodeGenTy &OrderedOpGen,
2297                                         SourceLocation Loc, bool IsThreads) {
2298   if (!CGF.HaveInsertPoint())
2299     return;
2300   // __kmpc_ordered(ident_t *, gtid);
2301   // OrderedOpGen();
2302   // __kmpc_end_ordered(ident_t *, gtid);
2303   // Prepare arguments and build a call to __kmpc_ordered
2304   if (IsThreads) {
2305     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2306     CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
2307                           createRuntimeFunction(OMPRTL__kmpc_end_ordered),
2308                           Args);
2309     OrderedOpGen.setAction(Action);
2310     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2311     return;
2312   }
2313   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2314 }
2315 
2316 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2317                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2318                                       bool ForceSimpleCall) {
2319   if (!CGF.HaveInsertPoint())
2320     return;
2321   // Build call __kmpc_cancel_barrier(loc, thread_id);
2322   // Build call __kmpc_barrier(loc, thread_id);
2323   unsigned Flags;
2324   if (Kind == OMPD_for)
2325     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2326   else if (Kind == OMPD_sections)
2327     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2328   else if (Kind == OMPD_single)
2329     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2330   else if (Kind == OMPD_barrier)
2331     Flags = OMP_IDENT_BARRIER_EXPL;
2332   else
2333     Flags = OMP_IDENT_BARRIER_IMPL;
2334   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2335   // thread_id);
2336   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2337                          getThreadID(CGF, Loc)};
2338   if (auto *OMPRegionInfo =
2339           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
2340     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2341       auto *Result = CGF.EmitRuntimeCall(
2342           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
2343       if (EmitChecks) {
2344         // if (__kmpc_cancel_barrier()) {
2345         //   exit from construct;
2346         // }
2347         auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
2348         auto *ContBB = CGF.createBasicBlock(".cancel.continue");
2349         auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
2350         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2351         CGF.EmitBlock(ExitBB);
2352         //   exit from construct;
2353         auto CancelDestination =
2354             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2355         CGF.EmitBranchThroughCleanup(CancelDestination);
2356         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2357       }
2358       return;
2359     }
2360   }
2361   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
2362 }
2363 
2364 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
2365 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2366                                           bool Chunked, bool Ordered) {
2367   switch (ScheduleKind) {
2368   case OMPC_SCHEDULE_static:
2369     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2370                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2371   case OMPC_SCHEDULE_dynamic:
2372     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2373   case OMPC_SCHEDULE_guided:
2374     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2375   case OMPC_SCHEDULE_runtime:
2376     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2377   case OMPC_SCHEDULE_auto:
2378     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2379   case OMPC_SCHEDULE_unknown:
2380     assert(!Chunked && "chunk was specified but schedule kind not known");
2381     return Ordered ? OMP_ord_static : OMP_sch_static;
2382   }
2383   llvm_unreachable("Unexpected runtime schedule");
2384 }
2385 
2386 /// \brief Map the OpenMP distribute schedule to the runtime enumeration.
2387 static OpenMPSchedType
2388 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2389   // only static is allowed for dist_schedule
2390   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2391 }
2392 
2393 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2394                                          bool Chunked) const {
2395   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2396   return Schedule == OMP_sch_static;
2397 }
2398 
2399 bool CGOpenMPRuntime::isStaticNonchunked(
2400     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2401   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2402   return Schedule == OMP_dist_sch_static;
2403 }
2404 
2405 
2406 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2407   auto Schedule =
2408       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2409   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2410   return Schedule != OMP_sch_static;
2411 }
2412 
2413 static int addMonoNonMonoModifier(OpenMPSchedType Schedule,
2414                                   OpenMPScheduleClauseModifier M1,
2415                                   OpenMPScheduleClauseModifier M2) {
2416   int Modifier = 0;
2417   switch (M1) {
2418   case OMPC_SCHEDULE_MODIFIER_monotonic:
2419     Modifier = OMP_sch_modifier_monotonic;
2420     break;
2421   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2422     Modifier = OMP_sch_modifier_nonmonotonic;
2423     break;
2424   case OMPC_SCHEDULE_MODIFIER_simd:
2425     if (Schedule == OMP_sch_static_chunked)
2426       Schedule = OMP_sch_static_balanced_chunked;
2427     break;
2428   case OMPC_SCHEDULE_MODIFIER_last:
2429   case OMPC_SCHEDULE_MODIFIER_unknown:
2430     break;
2431   }
2432   switch (M2) {
2433   case OMPC_SCHEDULE_MODIFIER_monotonic:
2434     Modifier = OMP_sch_modifier_monotonic;
2435     break;
2436   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2437     Modifier = OMP_sch_modifier_nonmonotonic;
2438     break;
2439   case OMPC_SCHEDULE_MODIFIER_simd:
2440     if (Schedule == OMP_sch_static_chunked)
2441       Schedule = OMP_sch_static_balanced_chunked;
2442     break;
2443   case OMPC_SCHEDULE_MODIFIER_last:
2444   case OMPC_SCHEDULE_MODIFIER_unknown:
2445     break;
2446   }
2447   return Schedule | Modifier;
2448 }
2449 
2450 void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF,
2451                                           SourceLocation Loc,
2452                                           const OpenMPScheduleTy &ScheduleKind,
2453                                           unsigned IVSize, bool IVSigned,
2454                                           bool Ordered, llvm::Value *UB,
2455                                           llvm::Value *Chunk) {
2456   if (!CGF.HaveInsertPoint())
2457     return;
2458   OpenMPSchedType Schedule =
2459       getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered);
2460   assert(Ordered ||
2461          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2462           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2463           Schedule != OMP_sch_static_balanced_chunked));
2464   // Call __kmpc_dispatch_init(
2465   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2466   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2467   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2468 
2469   // If the Chunk was not specified in the clause - use default value 1.
2470   if (Chunk == nullptr)
2471     Chunk = CGF.Builder.getIntN(IVSize, 1);
2472   llvm::Value *Args[] = {
2473       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2474       CGF.Builder.getInt32(addMonoNonMonoModifier(
2475           Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2476       CGF.Builder.getIntN(IVSize, 0),                   // Lower
2477       UB,                                               // Upper
2478       CGF.Builder.getIntN(IVSize, 1),                   // Stride
2479       Chunk                                             // Chunk
2480   };
2481   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2482 }
2483 
2484 static void emitForStaticInitCall(
2485     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2486     llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule,
2487     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2488     unsigned IVSize, bool Ordered, Address IL, Address LB, Address UB,
2489     Address ST, llvm::Value *Chunk) {
2490   if (!CGF.HaveInsertPoint())
2491      return;
2492 
2493    assert(!Ordered);
2494    assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2495           Schedule == OMP_sch_static_balanced_chunked ||
2496           Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2497           Schedule == OMP_dist_sch_static ||
2498           Schedule == OMP_dist_sch_static_chunked);
2499 
2500    // Call __kmpc_for_static_init(
2501    //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2502    //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2503    //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2504    //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2505    if (Chunk == nullptr) {
2506      assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2507              Schedule == OMP_dist_sch_static) &&
2508             "expected static non-chunked schedule");
2509      // If the Chunk was not specified in the clause - use default value 1.
2510        Chunk = CGF.Builder.getIntN(IVSize, 1);
2511    } else {
2512      assert((Schedule == OMP_sch_static_chunked ||
2513              Schedule == OMP_sch_static_balanced_chunked ||
2514              Schedule == OMP_ord_static_chunked ||
2515              Schedule == OMP_dist_sch_static_chunked) &&
2516             "expected static chunked schedule");
2517    }
2518    llvm::Value *Args[] = {
2519        UpdateLocation, ThreadId, CGF.Builder.getInt32(addMonoNonMonoModifier(
2520                                      Schedule, M1, M2)), // Schedule type
2521        IL.getPointer(),                                  // &isLastIter
2522        LB.getPointer(),                                  // &LB
2523        UB.getPointer(),                                  // &UB
2524        ST.getPointer(),                                  // &Stride
2525        CGF.Builder.getIntN(IVSize, 1),                   // Incr
2526        Chunk                                             // Chunk
2527    };
2528    CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2529 }
2530 
2531 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2532                                         SourceLocation Loc,
2533                                         const OpenMPScheduleTy &ScheduleKind,
2534                                         unsigned IVSize, bool IVSigned,
2535                                         bool Ordered, Address IL, Address LB,
2536                                         Address UB, Address ST,
2537                                         llvm::Value *Chunk) {
2538   OpenMPSchedType ScheduleNum =
2539       getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered);
2540   auto *UpdatedLocation = emitUpdateLocation(CGF, Loc);
2541   auto *ThreadId = getThreadID(CGF, Loc);
2542   auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned);
2543   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2544                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, IVSize,
2545                         Ordered, IL, LB, UB, ST, Chunk);
2546 }
2547 
2548 void CGOpenMPRuntime::emitDistributeStaticInit(
2549     CodeGenFunction &CGF, SourceLocation Loc,
2550     OpenMPDistScheduleClauseKind SchedKind, unsigned IVSize, bool IVSigned,
2551     bool Ordered, Address IL, Address LB, Address UB, Address ST,
2552     llvm::Value *Chunk) {
2553   OpenMPSchedType ScheduleNum = getRuntimeSchedule(SchedKind, Chunk != nullptr);
2554   auto *UpdatedLocation = emitUpdateLocation(CGF, Loc);
2555   auto *ThreadId = getThreadID(CGF, Loc);
2556   auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned);
2557   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2558                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2559                         OMPC_SCHEDULE_MODIFIER_unknown, IVSize, Ordered, IL, LB,
2560                         UB, ST, Chunk);
2561 }
2562 
2563 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2564                                           SourceLocation Loc) {
2565   if (!CGF.HaveInsertPoint())
2566     return;
2567   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2568   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2569   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
2570                       Args);
2571 }
2572 
2573 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2574                                                  SourceLocation Loc,
2575                                                  unsigned IVSize,
2576                                                  bool IVSigned) {
2577   if (!CGF.HaveInsertPoint())
2578     return;
2579   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2580   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2581   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2582 }
2583 
2584 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2585                                           SourceLocation Loc, unsigned IVSize,
2586                                           bool IVSigned, Address IL,
2587                                           Address LB, Address UB,
2588                                           Address ST) {
2589   // Call __kmpc_dispatch_next(
2590   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2591   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2592   //          kmp_int[32|64] *p_stride);
2593   llvm::Value *Args[] = {
2594       emitUpdateLocation(CGF, Loc),
2595       getThreadID(CGF, Loc),
2596       IL.getPointer(), // &isLastIter
2597       LB.getPointer(), // &Lower
2598       UB.getPointer(), // &Upper
2599       ST.getPointer()  // &Stride
2600   };
2601   llvm::Value *Call =
2602       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2603   return CGF.EmitScalarConversion(
2604       Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true),
2605       CGF.getContext().BoolTy, Loc);
2606 }
2607 
2608 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2609                                            llvm::Value *NumThreads,
2610                                            SourceLocation Loc) {
2611   if (!CGF.HaveInsertPoint())
2612     return;
2613   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2614   llvm::Value *Args[] = {
2615       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2616       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2617   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
2618                       Args);
2619 }
2620 
2621 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2622                                          OpenMPProcBindClauseKind ProcBind,
2623                                          SourceLocation Loc) {
2624   if (!CGF.HaveInsertPoint())
2625     return;
2626   // Constants for proc bind value accepted by the runtime.
2627   enum ProcBindTy {
2628     ProcBindFalse = 0,
2629     ProcBindTrue,
2630     ProcBindMaster,
2631     ProcBindClose,
2632     ProcBindSpread,
2633     ProcBindIntel,
2634     ProcBindDefault
2635   } RuntimeProcBind;
2636   switch (ProcBind) {
2637   case OMPC_PROC_BIND_master:
2638     RuntimeProcBind = ProcBindMaster;
2639     break;
2640   case OMPC_PROC_BIND_close:
2641     RuntimeProcBind = ProcBindClose;
2642     break;
2643   case OMPC_PROC_BIND_spread:
2644     RuntimeProcBind = ProcBindSpread;
2645     break;
2646   case OMPC_PROC_BIND_unknown:
2647     llvm_unreachable("Unsupported proc_bind value.");
2648   }
2649   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2650   llvm::Value *Args[] = {
2651       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2652       llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
2653   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
2654 }
2655 
2656 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2657                                 SourceLocation Loc) {
2658   if (!CGF.HaveInsertPoint())
2659     return;
2660   // Build call void __kmpc_flush(ident_t *loc)
2661   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
2662                       emitUpdateLocation(CGF, Loc));
2663 }
2664 
2665 namespace {
2666 /// \brief Indexes of fields for type kmp_task_t.
2667 enum KmpTaskTFields {
2668   /// \brief List of shared variables.
2669   KmpTaskTShareds,
2670   /// \brief Task routine.
2671   KmpTaskTRoutine,
2672   /// \brief Partition id for the untied tasks.
2673   KmpTaskTPartId,
2674   /// Function with call of destructors for private variables.
2675   Data1,
2676   /// Task priority.
2677   Data2,
2678   /// (Taskloops only) Lower bound.
2679   KmpTaskTLowerBound,
2680   /// (Taskloops only) Upper bound.
2681   KmpTaskTUpperBound,
2682   /// (Taskloops only) Stride.
2683   KmpTaskTStride,
2684   /// (Taskloops only) Is last iteration flag.
2685   KmpTaskTLastIter,
2686 };
2687 } // anonymous namespace
2688 
2689 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2690   // FIXME: Add other entries type when they become supported.
2691   return OffloadEntriesTargetRegion.empty();
2692 }
2693 
2694 /// \brief Initialize target region entry.
2695 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2696     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2697                                     StringRef ParentName, unsigned LineNum,
2698                                     unsigned Order) {
2699   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2700                                              "only required for the device "
2701                                              "code generation.");
2702   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2703       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr);
2704   ++OffloadingEntriesNum;
2705 }
2706 
2707 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2708     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2709                                   StringRef ParentName, unsigned LineNum,
2710                                   llvm::Constant *Addr, llvm::Constant *ID) {
2711   // If we are emitting code for a target, the entry is already initialized,
2712   // only has to be registered.
2713   if (CGM.getLangOpts().OpenMPIsDevice) {
2714     assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
2715            "Entry must exist.");
2716     auto &Entry =
2717         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
2718     assert(Entry.isValid() && "Entry not initialized!");
2719     Entry.setAddress(Addr);
2720     Entry.setID(ID);
2721     return;
2722   } else {
2723     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID);
2724     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
2725   }
2726 }
2727 
2728 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
2729     unsigned DeviceID, unsigned FileID, StringRef ParentName,
2730     unsigned LineNum) const {
2731   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
2732   if (PerDevice == OffloadEntriesTargetRegion.end())
2733     return false;
2734   auto PerFile = PerDevice->second.find(FileID);
2735   if (PerFile == PerDevice->second.end())
2736     return false;
2737   auto PerParentName = PerFile->second.find(ParentName);
2738   if (PerParentName == PerFile->second.end())
2739     return false;
2740   auto PerLine = PerParentName->second.find(LineNum);
2741   if (PerLine == PerParentName->second.end())
2742     return false;
2743   // Fail if this entry is already registered.
2744   if (PerLine->second.getAddress() || PerLine->second.getID())
2745     return false;
2746   return true;
2747 }
2748 
2749 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
2750     const OffloadTargetRegionEntryInfoActTy &Action) {
2751   // Scan all target region entries and perform the provided action.
2752   for (auto &D : OffloadEntriesTargetRegion)
2753     for (auto &F : D.second)
2754       for (auto &P : F.second)
2755         for (auto &L : P.second)
2756           Action(D.first, F.first, P.first(), L.first, L.second);
2757 }
2758 
2759 /// \brief Create a Ctor/Dtor-like function whose body is emitted through
2760 /// \a Codegen. This is used to emit the two functions that register and
2761 /// unregister the descriptor of the current compilation unit.
2762 static llvm::Function *
2763 createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name,
2764                                          const RegionCodeGenTy &Codegen) {
2765   auto &C = CGM.getContext();
2766   FunctionArgList Args;
2767   ImplicitParamDecl DummyPtr(C, /*DC=*/nullptr, SourceLocation(),
2768                              /*Id=*/nullptr, C.VoidPtrTy);
2769   Args.push_back(&DummyPtr);
2770 
2771   CodeGenFunction CGF(CGM);
2772   auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2773   auto FTy = CGM.getTypes().GetFunctionType(FI);
2774   auto *Fn =
2775       CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation());
2776   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args, SourceLocation());
2777   Codegen(CGF);
2778   CGF.FinishFunction();
2779   return Fn;
2780 }
2781 
2782 llvm::Function *
2783 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
2784 
2785   // If we don't have entries or if we are emitting code for the device, we
2786   // don't need to do anything.
2787   if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
2788     return nullptr;
2789 
2790   auto &M = CGM.getModule();
2791   auto &C = CGM.getContext();
2792 
2793   // Get list of devices we care about
2794   auto &Devices = CGM.getLangOpts().OMPTargetTriples;
2795 
2796   // We should be creating an offloading descriptor only if there are devices
2797   // specified.
2798   assert(!Devices.empty() && "No OpenMP offloading devices??");
2799 
2800   // Create the external variables that will point to the begin and end of the
2801   // host entries section. These will be defined by the linker.
2802   auto *OffloadEntryTy =
2803       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
2804   llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable(
2805       M, OffloadEntryTy, /*isConstant=*/true,
2806       llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
2807       ".omp_offloading.entries_begin");
2808   llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable(
2809       M, OffloadEntryTy, /*isConstant=*/true,
2810       llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
2811       ".omp_offloading.entries_end");
2812 
2813   // Create all device images
2814   auto *DeviceImageTy = cast<llvm::StructType>(
2815       CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy()));
2816   ConstantInitBuilder DeviceImagesBuilder(CGM);
2817   auto DeviceImagesEntries = DeviceImagesBuilder.beginArray(DeviceImageTy);
2818 
2819   for (unsigned i = 0; i < Devices.size(); ++i) {
2820     StringRef T = Devices[i].getTriple();
2821     auto *ImgBegin = new llvm::GlobalVariable(
2822         M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
2823         /*Initializer=*/nullptr,
2824         Twine(".omp_offloading.img_start.") + Twine(T));
2825     auto *ImgEnd = new llvm::GlobalVariable(
2826         M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
2827         /*Initializer=*/nullptr, Twine(".omp_offloading.img_end.") + Twine(T));
2828 
2829     auto Dev = DeviceImagesEntries.beginStruct(DeviceImageTy);
2830     Dev.add(ImgBegin);
2831     Dev.add(ImgEnd);
2832     Dev.add(HostEntriesBegin);
2833     Dev.add(HostEntriesEnd);
2834     Dev.finishAndAddTo(DeviceImagesEntries);
2835   }
2836 
2837   // Create device images global array.
2838   llvm::GlobalVariable *DeviceImages =
2839     DeviceImagesEntries.finishAndCreateGlobal(".omp_offloading.device_images",
2840                                               CGM.getPointerAlign(),
2841                                               /*isConstant=*/true);
2842   DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
2843 
2844   // This is a Zero array to be used in the creation of the constant expressions
2845   llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
2846                              llvm::Constant::getNullValue(CGM.Int32Ty)};
2847 
2848   // Create the target region descriptor.
2849   auto *BinaryDescriptorTy = cast<llvm::StructType>(
2850       CGM.getTypes().ConvertTypeForMem(getTgtBinaryDescriptorQTy()));
2851   ConstantInitBuilder DescBuilder(CGM);
2852   auto DescInit = DescBuilder.beginStruct(BinaryDescriptorTy);
2853   DescInit.addInt(CGM.Int32Ty, Devices.size());
2854   DescInit.add(llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(),
2855                                                     DeviceImages,
2856                                                     Index));
2857   DescInit.add(HostEntriesBegin);
2858   DescInit.add(HostEntriesEnd);
2859 
2860   auto *Desc = DescInit.finishAndCreateGlobal(".omp_offloading.descriptor",
2861                                               CGM.getPointerAlign(),
2862                                               /*isConstant=*/true);
2863 
2864   // Emit code to register or unregister the descriptor at execution
2865   // startup or closing, respectively.
2866 
2867   // Create a variable to drive the registration and unregistration of the
2868   // descriptor, so we can reuse the logic that emits Ctors and Dtors.
2869   auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var");
2870   ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(),
2871                                 IdentInfo, C.CharTy);
2872 
2873   auto *UnRegFn = createOffloadingBinaryDescriptorFunction(
2874       CGM, ".omp_offloading.descriptor_unreg",
2875       [&](CodeGenFunction &CGF, PrePostActionTy &) {
2876         CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
2877                              Desc);
2878       });
2879   auto *RegFn = createOffloadingBinaryDescriptorFunction(
2880       CGM, ".omp_offloading.descriptor_reg",
2881       [&](CodeGenFunction &CGF, PrePostActionTy &) {
2882         CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib),
2883                              Desc);
2884         CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
2885       });
2886   return RegFn;
2887 }
2888 
2889 void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *ID,
2890                                          llvm::Constant *Addr, uint64_t Size) {
2891   StringRef Name = Addr->getName();
2892   auto *TgtOffloadEntryType = cast<llvm::StructType>(
2893       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()));
2894   llvm::LLVMContext &C = CGM.getModule().getContext();
2895   llvm::Module &M = CGM.getModule();
2896 
2897   // Make sure the address has the right type.
2898   llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy);
2899 
2900   // Create constant string with the name.
2901   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
2902 
2903   llvm::GlobalVariable *Str =
2904       new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true,
2905                                llvm::GlobalValue::InternalLinkage, StrPtrInit,
2906                                ".omp_offloading.entry_name");
2907   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
2908   llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy);
2909 
2910   // We can't have any padding between symbols, so we need to have 1-byte
2911   // alignment.
2912   auto Align = CharUnits::fromQuantity(1);
2913 
2914   // Create the entry struct.
2915   ConstantInitBuilder EntryBuilder(CGM);
2916   auto EntryInit = EntryBuilder.beginStruct(TgtOffloadEntryType);
2917   EntryInit.add(AddrPtr);
2918   EntryInit.add(StrPtr);
2919   EntryInit.addInt(CGM.SizeTy, Size);
2920   llvm::GlobalVariable *Entry =
2921     EntryInit.finishAndCreateGlobal(".omp_offloading.entry",
2922                                     Align,
2923                                     /*constant*/ true,
2924                                     llvm::GlobalValue::ExternalLinkage);
2925 
2926   // The entry has to be created in the section the linker expects it to be.
2927   Entry->setSection(".omp_offloading.entries");
2928 }
2929 
2930 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2931   // Emit the offloading entries and metadata so that the device codegen side
2932   // can easily figure out what to emit. The produced metadata looks like
2933   // this:
2934   //
2935   // !omp_offload.info = !{!1, ...}
2936   //
2937   // Right now we only generate metadata for function that contain target
2938   // regions.
2939 
2940   // If we do not have entries, we dont need to do anything.
2941   if (OffloadEntriesInfoManager.empty())
2942     return;
2943 
2944   llvm::Module &M = CGM.getModule();
2945   llvm::LLVMContext &C = M.getContext();
2946   SmallVector<OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
2947       OrderedEntries(OffloadEntriesInfoManager.size());
2948 
2949   // Create the offloading info metadata node.
2950   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
2951 
2952   // Auxiliar methods to create metadata values and strings.
2953   auto getMDInt = [&](unsigned v) {
2954     return llvm::ConstantAsMetadata::get(
2955         llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v));
2956   };
2957 
2958   auto getMDString = [&](StringRef v) { return llvm::MDString::get(C, v); };
2959 
2960   // Create function that emits metadata for each target region entry;
2961   auto &&TargetRegionMetadataEmitter = [&](
2962       unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line,
2963       OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
2964     llvm::SmallVector<llvm::Metadata *, 32> Ops;
2965     // Generate metadata for target regions. Each entry of this metadata
2966     // contains:
2967     // - Entry 0 -> Kind of this type of metadata (0).
2968     // - Entry 1 -> Device ID of the file where the entry was identified.
2969     // - Entry 2 -> File ID of the file where the entry was identified.
2970     // - Entry 3 -> Mangled name of the function where the entry was identified.
2971     // - Entry 4 -> Line in the file where the entry was identified.
2972     // - Entry 5 -> Order the entry was created.
2973     // The first element of the metadata node is the kind.
2974     Ops.push_back(getMDInt(E.getKind()));
2975     Ops.push_back(getMDInt(DeviceID));
2976     Ops.push_back(getMDInt(FileID));
2977     Ops.push_back(getMDString(ParentName));
2978     Ops.push_back(getMDInt(Line));
2979     Ops.push_back(getMDInt(E.getOrder()));
2980 
2981     // Save this entry in the right position of the ordered entries array.
2982     OrderedEntries[E.getOrder()] = &E;
2983 
2984     // Add metadata to the named metadata node.
2985     MD->addOperand(llvm::MDNode::get(C, Ops));
2986   };
2987 
2988   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
2989       TargetRegionMetadataEmitter);
2990 
2991   for (auto *E : OrderedEntries) {
2992     assert(E && "All ordered entries must exist!");
2993     if (auto *CE =
2994             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
2995                 E)) {
2996       assert(CE->getID() && CE->getAddress() &&
2997              "Entry ID and Addr are invalid!");
2998       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0);
2999     } else
3000       llvm_unreachable("Unsupported entry kind.");
3001   }
3002 }
3003 
3004 /// \brief Loads all the offload entries information from the host IR
3005 /// metadata.
3006 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3007   // If we are in target mode, load the metadata from the host IR. This code has
3008   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3009 
3010   if (!CGM.getLangOpts().OpenMPIsDevice)
3011     return;
3012 
3013   if (CGM.getLangOpts().OMPHostIRFile.empty())
3014     return;
3015 
3016   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3017   if (Buf.getError())
3018     return;
3019 
3020   llvm::LLVMContext C;
3021   auto ME = expectedToErrorOrAndEmitErrors(
3022       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3023 
3024   if (ME.getError())
3025     return;
3026 
3027   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3028   if (!MD)
3029     return;
3030 
3031   for (auto I : MD->operands()) {
3032     llvm::MDNode *MN = cast<llvm::MDNode>(I);
3033 
3034     auto getMDInt = [&](unsigned Idx) {
3035       llvm::ConstantAsMetadata *V =
3036           cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3037       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3038     };
3039 
3040     auto getMDString = [&](unsigned Idx) {
3041       llvm::MDString *V = cast<llvm::MDString>(MN->getOperand(Idx));
3042       return V->getString();
3043     };
3044 
3045     switch (getMDInt(0)) {
3046     default:
3047       llvm_unreachable("Unexpected metadata!");
3048       break;
3049     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3050         OFFLOAD_ENTRY_INFO_TARGET_REGION:
3051       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3052           /*DeviceID=*/getMDInt(1), /*FileID=*/getMDInt(2),
3053           /*ParentName=*/getMDString(3), /*Line=*/getMDInt(4),
3054           /*Order=*/getMDInt(5));
3055       break;
3056     }
3057   }
3058 }
3059 
3060 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3061   if (!KmpRoutineEntryPtrTy) {
3062     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3063     auto &C = CGM.getContext();
3064     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3065     FunctionProtoType::ExtProtoInfo EPI;
3066     KmpRoutineEntryPtrQTy = C.getPointerType(
3067         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3068     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3069   }
3070 }
3071 
3072 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
3073                                        QualType FieldTy) {
3074   auto *Field = FieldDecl::Create(
3075       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
3076       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
3077       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
3078   Field->setAccess(AS_public);
3079   DC->addDecl(Field);
3080   return Field;
3081 }
3082 
3083 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3084 
3085   // Make sure the type of the entry is already created. This is the type we
3086   // have to create:
3087   // struct __tgt_offload_entry{
3088   //   void      *addr;       // Pointer to the offload entry info.
3089   //                          // (function or global)
3090   //   char      *name;       // Name of the function or global.
3091   //   size_t     size;       // Size of the entry info (0 if it a function).
3092   // };
3093   if (TgtOffloadEntryQTy.isNull()) {
3094     ASTContext &C = CGM.getContext();
3095     auto *RD = C.buildImplicitRecord("__tgt_offload_entry");
3096     RD->startDefinition();
3097     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3098     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3099     addFieldToRecordDecl(C, RD, C.getSizeType());
3100     RD->completeDefinition();
3101     TgtOffloadEntryQTy = C.getRecordType(RD);
3102   }
3103   return TgtOffloadEntryQTy;
3104 }
3105 
3106 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() {
3107   // These are the types we need to build:
3108   // struct __tgt_device_image{
3109   // void   *ImageStart;       // Pointer to the target code start.
3110   // void   *ImageEnd;         // Pointer to the target code end.
3111   // // We also add the host entries to the device image, as it may be useful
3112   // // for the target runtime to have access to that information.
3113   // __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all
3114   //                                       // the entries.
3115   // __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
3116   //                                       // entries (non inclusive).
3117   // };
3118   if (TgtDeviceImageQTy.isNull()) {
3119     ASTContext &C = CGM.getContext();
3120     auto *RD = C.buildImplicitRecord("__tgt_device_image");
3121     RD->startDefinition();
3122     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3123     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3124     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
3125     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
3126     RD->completeDefinition();
3127     TgtDeviceImageQTy = C.getRecordType(RD);
3128   }
3129   return TgtDeviceImageQTy;
3130 }
3131 
3132 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() {
3133   // struct __tgt_bin_desc{
3134   //   int32_t              NumDevices;      // Number of devices supported.
3135   //   __tgt_device_image   *DeviceImages;   // Arrays of device images
3136   //                                         // (one per device).
3137   //   __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all the
3138   //                                         // entries.
3139   //   __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
3140   //                                         // entries (non inclusive).
3141   // };
3142   if (TgtBinaryDescriptorQTy.isNull()) {
3143     ASTContext &C = CGM.getContext();
3144     auto *RD = C.buildImplicitRecord("__tgt_bin_desc");
3145     RD->startDefinition();
3146     addFieldToRecordDecl(
3147         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3148     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy()));
3149     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
3150     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
3151     RD->completeDefinition();
3152     TgtBinaryDescriptorQTy = C.getRecordType(RD);
3153   }
3154   return TgtBinaryDescriptorQTy;
3155 }
3156 
3157 namespace {
3158 struct PrivateHelpersTy {
3159   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
3160                    const VarDecl *PrivateElemInit)
3161       : Original(Original), PrivateCopy(PrivateCopy),
3162         PrivateElemInit(PrivateElemInit) {}
3163   const VarDecl *Original;
3164   const VarDecl *PrivateCopy;
3165   const VarDecl *PrivateElemInit;
3166 };
3167 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3168 } // anonymous namespace
3169 
3170 static RecordDecl *
3171 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3172   if (!Privates.empty()) {
3173     auto &C = CGM.getContext();
3174     // Build struct .kmp_privates_t. {
3175     //         /*  private vars  */
3176     //       };
3177     auto *RD = C.buildImplicitRecord(".kmp_privates.t");
3178     RD->startDefinition();
3179     for (auto &&Pair : Privates) {
3180       auto *VD = Pair.second.Original;
3181       auto Type = VD->getType();
3182       Type = Type.getNonReferenceType();
3183       auto *FD = addFieldToRecordDecl(C, RD, Type);
3184       if (VD->hasAttrs()) {
3185         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3186              E(VD->getAttrs().end());
3187              I != E; ++I)
3188           FD->addAttr(*I);
3189       }
3190     }
3191     RD->completeDefinition();
3192     return RD;
3193   }
3194   return nullptr;
3195 }
3196 
3197 static RecordDecl *
3198 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3199                          QualType KmpInt32Ty,
3200                          QualType KmpRoutineEntryPointerQTy) {
3201   auto &C = CGM.getContext();
3202   // Build struct kmp_task_t {
3203   //         void *              shareds;
3204   //         kmp_routine_entry_t routine;
3205   //         kmp_int32           part_id;
3206   //         kmp_cmplrdata_t data1;
3207   //         kmp_cmplrdata_t data2;
3208   // For taskloops additional fields:
3209   //         kmp_uint64          lb;
3210   //         kmp_uint64          ub;
3211   //         kmp_int64           st;
3212   //         kmp_int32           liter;
3213   //       };
3214   auto *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3215   UD->startDefinition();
3216   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3217   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3218   UD->completeDefinition();
3219   QualType KmpCmplrdataTy = C.getRecordType(UD);
3220   auto *RD = C.buildImplicitRecord("kmp_task_t");
3221   RD->startDefinition();
3222   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3223   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3224   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3225   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3226   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3227   if (isOpenMPTaskLoopDirective(Kind)) {
3228     QualType KmpUInt64Ty =
3229         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3230     QualType KmpInt64Ty =
3231         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3232     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3233     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3234     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3235     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3236   }
3237   RD->completeDefinition();
3238   return RD;
3239 }
3240 
3241 static RecordDecl *
3242 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3243                                      ArrayRef<PrivateDataTy> Privates) {
3244   auto &C = CGM.getContext();
3245   // Build struct kmp_task_t_with_privates {
3246   //         kmp_task_t task_data;
3247   //         .kmp_privates_t. privates;
3248   //       };
3249   auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3250   RD->startDefinition();
3251   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3252   if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) {
3253     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3254   }
3255   RD->completeDefinition();
3256   return RD;
3257 }
3258 
3259 /// \brief Emit a proxy function which accepts kmp_task_t as the second
3260 /// argument.
3261 /// \code
3262 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3263 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3264 ///   For taskloops:
3265 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3266 ///   tt->shareds);
3267 ///   return 0;
3268 /// }
3269 /// \endcode
3270 static llvm::Value *
3271 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3272                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3273                       QualType KmpTaskTWithPrivatesPtrQTy,
3274                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3275                       QualType SharedsPtrTy, llvm::Value *TaskFunction,
3276                       llvm::Value *TaskPrivatesMap) {
3277   auto &C = CGM.getContext();
3278   FunctionArgList Args;
3279   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
3280   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
3281                                 /*Id=*/nullptr,
3282                                 KmpTaskTWithPrivatesPtrQTy.withRestrict());
3283   Args.push_back(&GtidArg);
3284   Args.push_back(&TaskTypeArg);
3285   auto &TaskEntryFnInfo =
3286       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3287   auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3288   auto *TaskEntry =
3289       llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
3290                              ".omp_task_entry.", &CGM.getModule());
3291   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo);
3292   CodeGenFunction CGF(CGM);
3293   CGF.disableDebugInfo();
3294   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
3295 
3296   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3297   // tt,
3298   // For taskloops:
3299   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3300   // tt->task_data.shareds);
3301   auto *GtidParam = CGF.EmitLoadOfScalar(
3302       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3303   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3304       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3305       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3306   auto *KmpTaskTWithPrivatesQTyRD =
3307       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3308   LValue Base =
3309       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3310   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3311   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3312   auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3313   auto *PartidParam = PartIdLVal.getPointer();
3314 
3315   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3316   auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3317   auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3318       CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(),
3319       CGF.ConvertTypeForMem(SharedsPtrTy));
3320 
3321   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3322   llvm::Value *PrivatesParam;
3323   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3324     auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3325     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3326         PrivatesLVal.getPointer(), CGF.VoidPtrTy);
3327   } else
3328     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3329 
3330   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3331                                TaskPrivatesMap,
3332                                CGF.Builder
3333                                    .CreatePointerBitCastOrAddrSpaceCast(
3334                                        TDBase.getAddress(), CGF.VoidPtrTy)
3335                                    .getPointer()};
3336   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3337                                           std::end(CommonArgs));
3338   if (isOpenMPTaskLoopDirective(Kind)) {
3339     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3340     auto LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3341     auto *LBParam = CGF.EmitLoadOfLValue(LBLVal, Loc).getScalarVal();
3342     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3343     auto UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3344     auto *UBParam = CGF.EmitLoadOfLValue(UBLVal, Loc).getScalarVal();
3345     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3346     auto StLVal = CGF.EmitLValueForField(Base, *StFI);
3347     auto *StParam = CGF.EmitLoadOfLValue(StLVal, Loc).getScalarVal();
3348     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3349     auto LILVal = CGF.EmitLValueForField(Base, *LIFI);
3350     auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal();
3351     CallArgs.push_back(LBParam);
3352     CallArgs.push_back(UBParam);
3353     CallArgs.push_back(StParam);
3354     CallArgs.push_back(LIParam);
3355   }
3356   CallArgs.push_back(SharedsParam);
3357 
3358   CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
3359   CGF.EmitStoreThroughLValue(
3360       RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3361       CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3362   CGF.FinishFunction();
3363   return TaskEntry;
3364 }
3365 
3366 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3367                                             SourceLocation Loc,
3368                                             QualType KmpInt32Ty,
3369                                             QualType KmpTaskTWithPrivatesPtrQTy,
3370                                             QualType KmpTaskTWithPrivatesQTy) {
3371   auto &C = CGM.getContext();
3372   FunctionArgList Args;
3373   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
3374   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
3375                                 /*Id=*/nullptr,
3376                                 KmpTaskTWithPrivatesPtrQTy.withRestrict());
3377   Args.push_back(&GtidArg);
3378   Args.push_back(&TaskTypeArg);
3379   FunctionType::ExtInfo Info;
3380   auto &DestructorFnInfo =
3381       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3382   auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo);
3383   auto *DestructorFn =
3384       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3385                              ".omp_task_destructor.", &CGM.getModule());
3386   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn,
3387                                     DestructorFnInfo);
3388   CodeGenFunction CGF(CGM);
3389   CGF.disableDebugInfo();
3390   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3391                     Args);
3392 
3393   LValue Base = CGF.EmitLoadOfPointerLValue(
3394       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3395       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3396   auto *KmpTaskTWithPrivatesQTyRD =
3397       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3398   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3399   Base = CGF.EmitLValueForField(Base, *FI);
3400   for (auto *Field :
3401        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3402     if (auto DtorKind = Field->getType().isDestructedType()) {
3403       auto FieldLValue = CGF.EmitLValueForField(Base, Field);
3404       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
3405     }
3406   }
3407   CGF.FinishFunction();
3408   return DestructorFn;
3409 }
3410 
3411 /// \brief Emit a privates mapping function for correct handling of private and
3412 /// firstprivate variables.
3413 /// \code
3414 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3415 /// **noalias priv1,...,  <tyn> **noalias privn) {
3416 ///   *priv1 = &.privates.priv1;
3417 ///   ...;
3418 ///   *privn = &.privates.privn;
3419 /// }
3420 /// \endcode
3421 static llvm::Value *
3422 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3423                                ArrayRef<const Expr *> PrivateVars,
3424                                ArrayRef<const Expr *> FirstprivateVars,
3425                                ArrayRef<const Expr *> LastprivateVars,
3426                                QualType PrivatesQTy,
3427                                ArrayRef<PrivateDataTy> Privates) {
3428   auto &C = CGM.getContext();
3429   FunctionArgList Args;
3430   ImplicitParamDecl TaskPrivatesArg(
3431       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3432       C.getPointerType(PrivatesQTy).withConst().withRestrict());
3433   Args.push_back(&TaskPrivatesArg);
3434   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
3435   unsigned Counter = 1;
3436   for (auto *E: PrivateVars) {
3437     Args.push_back(ImplicitParamDecl::Create(
3438         C, /*DC=*/nullptr, Loc,
3439         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
3440                             .withConst()
3441                             .withRestrict()));
3442     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3443     PrivateVarsPos[VD] = Counter;
3444     ++Counter;
3445   }
3446   for (auto *E : FirstprivateVars) {
3447     Args.push_back(ImplicitParamDecl::Create(
3448         C, /*DC=*/nullptr, Loc,
3449         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
3450                             .withConst()
3451                             .withRestrict()));
3452     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3453     PrivateVarsPos[VD] = Counter;
3454     ++Counter;
3455   }
3456   for (auto *E: LastprivateVars) {
3457     Args.push_back(ImplicitParamDecl::Create(
3458         C, /*DC=*/nullptr, Loc,
3459         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
3460                             .withConst()
3461                             .withRestrict()));
3462     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3463     PrivateVarsPos[VD] = Counter;
3464     ++Counter;
3465   }
3466   auto &TaskPrivatesMapFnInfo =
3467       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3468   auto *TaskPrivatesMapTy =
3469       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3470   auto *TaskPrivatesMap = llvm::Function::Create(
3471       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage,
3472       ".omp_task_privates_map.", &CGM.getModule());
3473   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap,
3474                                     TaskPrivatesMapFnInfo);
3475   TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3476   CodeGenFunction CGF(CGM);
3477   CGF.disableDebugInfo();
3478   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3479                     TaskPrivatesMapFnInfo, Args);
3480 
3481   // *privi = &.privates.privi;
3482   LValue Base = CGF.EmitLoadOfPointerLValue(
3483       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3484       TaskPrivatesArg.getType()->castAs<PointerType>());
3485   auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3486   Counter = 0;
3487   for (auto *Field : PrivatesQTyRD->fields()) {
3488     auto FieldLVal = CGF.EmitLValueForField(Base, Field);
3489     auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3490     auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3491     auto RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3492         RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
3493     CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
3494     ++Counter;
3495   }
3496   CGF.FinishFunction();
3497   return TaskPrivatesMap;
3498 }
3499 
3500 static int array_pod_sort_comparator(const PrivateDataTy *P1,
3501                                      const PrivateDataTy *P2) {
3502   return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0);
3503 }
3504 
3505 /// Emit initialization for private variables in task-based directives.
3506 static void emitPrivatesInit(CodeGenFunction &CGF,
3507                              const OMPExecutableDirective &D,
3508                              Address KmpTaskSharedsPtr, LValue TDBase,
3509                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3510                              QualType SharedsTy, QualType SharedsPtrTy,
3511                              const OMPTaskDataTy &Data,
3512                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3513   auto &C = CGF.getContext();
3514   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3515   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3516   LValue SrcBase;
3517   if (!Data.FirstprivateVars.empty()) {
3518     SrcBase = CGF.MakeAddrLValue(
3519         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3520             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3521         SharedsTy);
3522   }
3523   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(
3524       cast<CapturedStmt>(*D.getAssociatedStmt()));
3525   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3526   for (auto &&Pair : Privates) {
3527     auto *VD = Pair.second.PrivateCopy;
3528     auto *Init = VD->getAnyInitializer();
3529     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3530                              !CGF.isTrivialInitializer(Init)))) {
3531       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3532       if (auto *Elem = Pair.second.PrivateElemInit) {
3533         auto *OriginalVD = Pair.second.Original;
3534         auto *SharedField = CapturesInfo.lookup(OriginalVD);
3535         auto SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3536         SharedRefLValue = CGF.MakeAddrLValue(
3537             Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
3538             SharedRefLValue.getType(), AlignmentSource::Decl);
3539         QualType Type = OriginalVD->getType();
3540         if (Type->isArrayType()) {
3541           // Initialize firstprivate array.
3542           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3543             // Perform simple memcpy.
3544             CGF.EmitAggregateAssign(PrivateLValue.getAddress(),
3545                                     SharedRefLValue.getAddress(), Type);
3546           } else {
3547             // Initialize firstprivate array using element-by-element
3548             // intialization.
3549             CGF.EmitOMPAggregateAssign(
3550                 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
3551                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3552                                                   Address SrcElement) {
3553                   // Clean up any temporaries needed by the initialization.
3554                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3555                   InitScope.addPrivate(
3556                       Elem, [SrcElement]() -> Address { return SrcElement; });
3557                   (void)InitScope.Privatize();
3558                   // Emit initialization for single element.
3559                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3560                       CGF, &CapturesInfo);
3561                   CGF.EmitAnyExprToMem(Init, DestElement,
3562                                        Init->getType().getQualifiers(),
3563                                        /*IsInitializer=*/false);
3564                 });
3565           }
3566         } else {
3567           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3568           InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
3569             return SharedRefLValue.getAddress();
3570           });
3571           (void)InitScope.Privatize();
3572           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3573           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3574                              /*capturedByInit=*/false);
3575         }
3576       } else
3577         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3578     }
3579     ++FI;
3580   }
3581 }
3582 
3583 /// Check if duplication function is required for taskloops.
3584 static bool checkInitIsRequired(CodeGenFunction &CGF,
3585                                 ArrayRef<PrivateDataTy> Privates) {
3586   bool InitRequired = false;
3587   for (auto &&Pair : Privates) {
3588     auto *VD = Pair.second.PrivateCopy;
3589     auto *Init = VD->getAnyInitializer();
3590     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3591                                     !CGF.isTrivialInitializer(Init));
3592   }
3593   return InitRequired;
3594 }
3595 
3596 
3597 /// Emit task_dup function (for initialization of
3598 /// private/firstprivate/lastprivate vars and last_iter flag)
3599 /// \code
3600 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3601 /// lastpriv) {
3602 /// // setup lastprivate flag
3603 ///    task_dst->last = lastpriv;
3604 /// // could be constructor calls here...
3605 /// }
3606 /// \endcode
3607 static llvm::Value *
3608 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3609                     const OMPExecutableDirective &D,
3610                     QualType KmpTaskTWithPrivatesPtrQTy,
3611                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3612                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3613                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3614                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3615   auto &C = CGM.getContext();
3616   FunctionArgList Args;
3617   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc,
3618                            /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy);
3619   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc,
3620                            /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy);
3621   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc,
3622                                 /*Id=*/nullptr, C.IntTy);
3623   Args.push_back(&DstArg);
3624   Args.push_back(&SrcArg);
3625   Args.push_back(&LastprivArg);
3626   auto &TaskDupFnInfo =
3627       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3628   auto *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3629   auto *TaskDup =
3630       llvm::Function::Create(TaskDupTy, llvm::GlobalValue::InternalLinkage,
3631                              ".omp_task_dup.", &CGM.getModule());
3632   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskDup, TaskDupFnInfo);
3633   CodeGenFunction CGF(CGM);
3634   CGF.disableDebugInfo();
3635   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args);
3636 
3637   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3638       CGF.GetAddrOfLocalVar(&DstArg),
3639       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3640   // task_dst->liter = lastpriv;
3641   if (WithLastIter) {
3642     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3643     LValue Base = CGF.EmitLValueForField(
3644         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3645     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3646     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3647         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3648     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3649   }
3650 
3651   // Emit initial values for private copies (if any).
3652   assert(!Privates.empty());
3653   Address KmpTaskSharedsPtr = Address::invalid();
3654   if (!Data.FirstprivateVars.empty()) {
3655     LValue TDBase = CGF.EmitLoadOfPointerLValue(
3656         CGF.GetAddrOfLocalVar(&SrcArg),
3657         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3658     LValue Base = CGF.EmitLValueForField(
3659         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3660     KmpTaskSharedsPtr = Address(
3661         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3662                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
3663                                                   KmpTaskTShareds)),
3664                              Loc),
3665         CGF.getNaturalTypeAlignment(SharedsTy));
3666   }
3667   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3668                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3669   CGF.FinishFunction();
3670   return TaskDup;
3671 }
3672 
3673 /// Checks if destructor function is required to be generated.
3674 /// \return true if cleanups are required, false otherwise.
3675 static bool
3676 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
3677   bool NeedsCleanup = false;
3678   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3679   auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
3680   for (auto *FD : PrivateRD->fields()) {
3681     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
3682     if (NeedsCleanup)
3683       break;
3684   }
3685   return NeedsCleanup;
3686 }
3687 
3688 CGOpenMPRuntime::TaskResultTy
3689 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
3690                               const OMPExecutableDirective &D,
3691                               llvm::Value *TaskFunction, QualType SharedsTy,
3692                               Address Shareds, const OMPTaskDataTy &Data) {
3693   auto &C = CGM.getContext();
3694   llvm::SmallVector<PrivateDataTy, 4> Privates;
3695   // Aggregate privates and sort them by the alignment.
3696   auto I = Data.PrivateCopies.begin();
3697   for (auto *E : Data.PrivateVars) {
3698     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3699     Privates.push_back(std::make_pair(
3700         C.getDeclAlign(VD),
3701         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3702                          /*PrivateElemInit=*/nullptr)));
3703     ++I;
3704   }
3705   I = Data.FirstprivateCopies.begin();
3706   auto IElemInitRef = Data.FirstprivateInits.begin();
3707   for (auto *E : Data.FirstprivateVars) {
3708     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3709     Privates.push_back(std::make_pair(
3710         C.getDeclAlign(VD),
3711         PrivateHelpersTy(
3712             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3713             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))));
3714     ++I;
3715     ++IElemInitRef;
3716   }
3717   I = Data.LastprivateCopies.begin();
3718   for (auto *E : Data.LastprivateVars) {
3719     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3720     Privates.push_back(std::make_pair(
3721         C.getDeclAlign(VD),
3722         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3723                          /*PrivateElemInit=*/nullptr)));
3724     ++I;
3725   }
3726   llvm::array_pod_sort(Privates.begin(), Privates.end(),
3727                        array_pod_sort_comparator);
3728   auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3729   // Build type kmp_routine_entry_t (if not built yet).
3730   emitKmpRoutineEntryT(KmpInt32Ty);
3731   // Build type kmp_task_t (if not built yet).
3732   if (KmpTaskTQTy.isNull()) {
3733     KmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3734         CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3735   }
3736   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3737   // Build particular struct kmp_task_t for the given task.
3738   auto *KmpTaskTWithPrivatesQTyRD =
3739       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
3740   auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
3741   QualType KmpTaskTWithPrivatesPtrQTy =
3742       C.getPointerType(KmpTaskTWithPrivatesQTy);
3743   auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
3744   auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo();
3745   auto *KmpTaskTWithPrivatesTySize = CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3746   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3747 
3748   // Emit initial values for private copies (if any).
3749   llvm::Value *TaskPrivatesMap = nullptr;
3750   auto *TaskPrivatesMapTy =
3751       std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(),
3752                 3)
3753           ->getType();
3754   if (!Privates.empty()) {
3755     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3756     TaskPrivatesMap = emitTaskPrivateMappingFunction(
3757         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
3758         FI->getType(), Privates);
3759     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3760         TaskPrivatesMap, TaskPrivatesMapTy);
3761   } else {
3762     TaskPrivatesMap = llvm::ConstantPointerNull::get(
3763         cast<llvm::PointerType>(TaskPrivatesMapTy));
3764   }
3765   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3766   // kmp_task_t *tt);
3767   auto *TaskEntry = emitProxyTaskFunction(
3768       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3769       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3770       TaskPrivatesMap);
3771 
3772   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3773   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3774   // kmp_routine_entry_t *task_entry);
3775   // Task flags. Format is taken from
3776   // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
3777   // description of kmp_tasking_flags struct.
3778   enum {
3779     TiedFlag = 0x1,
3780     FinalFlag = 0x2,
3781     DestructorsFlag = 0x8,
3782     PriorityFlag = 0x20
3783   };
3784   unsigned Flags = Data.Tied ? TiedFlag : 0;
3785   bool NeedsCleanup = false;
3786   if (!Privates.empty()) {
3787     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
3788     if (NeedsCleanup)
3789       Flags = Flags | DestructorsFlag;
3790   }
3791   if (Data.Priority.getInt())
3792     Flags = Flags | PriorityFlag;
3793   auto *TaskFlags =
3794       Data.Final.getPointer()
3795           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3796                                      CGF.Builder.getInt32(FinalFlag),
3797                                      CGF.Builder.getInt32(/*C=*/0))
3798           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3799   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3800   auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3801   llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
3802                               getThreadID(CGF, Loc), TaskFlags,
3803                               KmpTaskTWithPrivatesTySize, SharedsSize,
3804                               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3805                                   TaskEntry, KmpRoutineEntryPtrTy)};
3806   auto *NewTask = CGF.EmitRuntimeCall(
3807       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
3808   auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3809       NewTask, KmpTaskTWithPrivatesPtrTy);
3810   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
3811                                                KmpTaskTWithPrivatesQTy);
3812   LValue TDBase =
3813       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
3814   // Fill the data in the resulting kmp_task_t record.
3815   // Copy shareds if there are any.
3816   Address KmpTaskSharedsPtr = Address::invalid();
3817   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
3818     KmpTaskSharedsPtr =
3819         Address(CGF.EmitLoadOfScalar(
3820                     CGF.EmitLValueForField(
3821                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
3822                                            KmpTaskTShareds)),
3823                     Loc),
3824                 CGF.getNaturalTypeAlignment(SharedsTy));
3825     CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy);
3826   }
3827   // Emit initial values for private copies (if any).
3828   TaskResultTy Result;
3829   if (!Privates.empty()) {
3830     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
3831                      SharedsTy, SharedsPtrTy, Data, Privates,
3832                      /*ForDup=*/false);
3833     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
3834         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3835       Result.TaskDupFn = emitTaskDupFunction(
3836           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3837           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3838           /*WithLastIter=*/!Data.LastprivateVars.empty());
3839     }
3840   }
3841   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
3842   enum { Priority = 0, Destructors = 1 };
3843   // Provide pointer to function with destructors for privates.
3844   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
3845   auto *KmpCmplrdataUD = (*FI)->getType()->getAsUnionType()->getDecl();
3846   if (NeedsCleanup) {
3847     llvm::Value *DestructorFn = emitDestructorsFunction(
3848         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3849         KmpTaskTWithPrivatesQTy);
3850     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
3851     LValue DestructorsLV = CGF.EmitLValueForField(
3852         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
3853     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3854                               DestructorFn, KmpRoutineEntryPtrTy),
3855                           DestructorsLV);
3856   }
3857   // Set priority.
3858   if (Data.Priority.getInt()) {
3859     LValue Data2LV = CGF.EmitLValueForField(
3860         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
3861     LValue PriorityLV = CGF.EmitLValueForField(
3862         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
3863     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
3864   }
3865   Result.NewTask = NewTask;
3866   Result.TaskEntry = TaskEntry;
3867   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
3868   Result.TDBase = TDBase;
3869   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
3870   return Result;
3871 }
3872 
3873 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
3874                                    const OMPExecutableDirective &D,
3875                                    llvm::Value *TaskFunction,
3876                                    QualType SharedsTy, Address Shareds,
3877                                    const Expr *IfCond,
3878                                    const OMPTaskDataTy &Data) {
3879   if (!CGF.HaveInsertPoint())
3880     return;
3881 
3882   TaskResultTy Result =
3883       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
3884   llvm::Value *NewTask = Result.NewTask;
3885   llvm::Value *TaskEntry = Result.TaskEntry;
3886   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
3887   LValue TDBase = Result.TDBase;
3888   RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
3889   auto &C = CGM.getContext();
3890   // Process list of dependences.
3891   Address DependenciesArray = Address::invalid();
3892   unsigned NumDependencies = Data.Dependences.size();
3893   if (NumDependencies) {
3894     // Dependence kind for RTL.
3895     enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 };
3896     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
3897     RecordDecl *KmpDependInfoRD;
3898     QualType FlagsTy =
3899         C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
3900     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
3901     if (KmpDependInfoTy.isNull()) {
3902       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
3903       KmpDependInfoRD->startDefinition();
3904       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
3905       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
3906       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
3907       KmpDependInfoRD->completeDefinition();
3908       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
3909     } else
3910       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
3911     CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy);
3912     // Define type kmp_depend_info[<Dependences.size()>];
3913     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
3914         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
3915         ArrayType::Normal, /*IndexTypeQuals=*/0);
3916     // kmp_depend_info[<Dependences.size()>] deps;
3917     DependenciesArray =
3918         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
3919     for (unsigned i = 0; i < NumDependencies; ++i) {
3920       const Expr *E = Data.Dependences[i].second;
3921       auto Addr = CGF.EmitLValue(E);
3922       llvm::Value *Size;
3923       QualType Ty = E->getType();
3924       if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
3925         LValue UpAddrLVal =
3926             CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
3927         llvm::Value *UpAddr =
3928             CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
3929         llvm::Value *LowIntPtr =
3930             CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
3931         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
3932         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3933       } else
3934         Size = CGF.getTypeSize(Ty);
3935       auto Base = CGF.MakeAddrLValue(
3936           CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize),
3937           KmpDependInfoTy);
3938       // deps[i].base_addr = &<Dependences[i].second>;
3939       auto BaseAddrLVal = CGF.EmitLValueForField(
3940           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
3941       CGF.EmitStoreOfScalar(
3942           CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
3943           BaseAddrLVal);
3944       // deps[i].len = sizeof(<Dependences[i].second>);
3945       auto LenLVal = CGF.EmitLValueForField(
3946           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
3947       CGF.EmitStoreOfScalar(Size, LenLVal);
3948       // deps[i].flags = <Dependences[i].first>;
3949       RTLDependenceKindTy DepKind;
3950       switch (Data.Dependences[i].first) {
3951       case OMPC_DEPEND_in:
3952         DepKind = DepIn;
3953         break;
3954       // Out and InOut dependencies must use the same code.
3955       case OMPC_DEPEND_out:
3956       case OMPC_DEPEND_inout:
3957         DepKind = DepInOut;
3958         break;
3959       case OMPC_DEPEND_source:
3960       case OMPC_DEPEND_sink:
3961       case OMPC_DEPEND_unknown:
3962         llvm_unreachable("Unknown task dependence type");
3963       }
3964       auto FlagsLVal = CGF.EmitLValueForField(
3965           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
3966       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
3967                             FlagsLVal);
3968     }
3969     DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3970         CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()),
3971         CGF.VoidPtrTy);
3972   }
3973 
3974   // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
3975   // libcall.
3976   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
3977   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
3978   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
3979   // list is not empty
3980   auto *ThreadID = getThreadID(CGF, Loc);
3981   auto *UpLoc = emitUpdateLocation(CGF, Loc);
3982   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
3983   llvm::Value *DepTaskArgs[7];
3984   if (NumDependencies) {
3985     DepTaskArgs[0] = UpLoc;
3986     DepTaskArgs[1] = ThreadID;
3987     DepTaskArgs[2] = NewTask;
3988     DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
3989     DepTaskArgs[4] = DependenciesArray.getPointer();
3990     DepTaskArgs[5] = CGF.Builder.getInt32(0);
3991     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3992   }
3993   auto &&ThenCodeGen = [this, Loc, &Data, TDBase, KmpTaskTQTyRD,
3994                         NumDependencies, &TaskArgs,
3995                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
3996     if (!Data.Tied) {
3997       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3998       auto PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
3999       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4000     }
4001     if (NumDependencies) {
4002       CGF.EmitRuntimeCall(
4003           createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
4004     } else {
4005       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
4006                           TaskArgs);
4007     }
4008     // Check if parent region is untied and build return for untied task;
4009     if (auto *Region =
4010             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4011       Region->emitUntiedSwitch(CGF);
4012   };
4013 
4014   llvm::Value *DepWaitTaskArgs[6];
4015   if (NumDependencies) {
4016     DepWaitTaskArgs[0] = UpLoc;
4017     DepWaitTaskArgs[1] = ThreadID;
4018     DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
4019     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
4020     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4021     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4022   }
4023   auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
4024                         NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF,
4025                                                            PrePostActionTy &) {
4026     auto &RT = CGF.CGM.getOpenMPRuntime();
4027     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4028     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4029     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4030     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4031     // is specified.
4032     if (NumDependencies)
4033       CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
4034                           DepWaitTaskArgs);
4035     // Call proxy_task_entry(gtid, new_task);
4036     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy](
4037         CodeGenFunction &CGF, PrePostActionTy &Action) {
4038       Action.Enter(CGF);
4039       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4040       CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs);
4041     };
4042 
4043     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4044     // kmp_task_t *new_task);
4045     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4046     // kmp_task_t *new_task);
4047     RegionCodeGenTy RCG(CodeGen);
4048     CommonActionTy Action(
4049         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
4050         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
4051     RCG.setAction(Action);
4052     RCG(CGF);
4053   };
4054 
4055   if (IfCond)
4056     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4057   else {
4058     RegionCodeGenTy ThenRCG(ThenCodeGen);
4059     ThenRCG(CGF);
4060   }
4061 }
4062 
4063 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
4064                                        const OMPLoopDirective &D,
4065                                        llvm::Value *TaskFunction,
4066                                        QualType SharedsTy, Address Shareds,
4067                                        const Expr *IfCond,
4068                                        const OMPTaskDataTy &Data) {
4069   if (!CGF.HaveInsertPoint())
4070     return;
4071   TaskResultTy Result =
4072       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4073   // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
4074   // libcall.
4075   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4076   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4077   // sched, kmp_uint64 grainsize, void *task_dup);
4078   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4079   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4080   llvm::Value *IfVal;
4081   if (IfCond) {
4082     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4083                                       /*isSigned=*/true);
4084   } else
4085     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4086 
4087   LValue LBLVal = CGF.EmitLValueForField(
4088       Result.TDBase,
4089       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4090   auto *LBVar =
4091       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4092   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
4093                        /*IsInitializer=*/true);
4094   LValue UBLVal = CGF.EmitLValueForField(
4095       Result.TDBase,
4096       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4097   auto *UBVar =
4098       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4099   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
4100                        /*IsInitializer=*/true);
4101   LValue StLVal = CGF.EmitLValueForField(
4102       Result.TDBase,
4103       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4104   auto *StVar =
4105       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4106   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
4107                        /*IsInitializer=*/true);
4108   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4109   llvm::Value *TaskArgs[] = {
4110       UpLoc, ThreadID, Result.NewTask, IfVal, LBLVal.getPointer(),
4111       UBLVal.getPointer(), CGF.EmitLoadOfScalar(StLVal, SourceLocation()),
4112       llvm::ConstantInt::getSigned(CGF.IntTy, Data.Nogroup ? 1 : 0),
4113       llvm::ConstantInt::getSigned(
4114           CGF.IntTy, Data.Schedule.getPointer()
4115                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
4116                          : NoSchedule),
4117       Data.Schedule.getPointer()
4118           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4119                                       /*isSigned=*/false)
4120           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
4121       Result.TaskDupFn
4122           ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Result.TaskDupFn,
4123                                                             CGF.VoidPtrTy)
4124           : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
4125   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
4126 }
4127 
4128 /// \brief Emit reduction operation for each element of array (required for
4129 /// array sections) LHS op = RHS.
4130 /// \param Type Type of array.
4131 /// \param LHSVar Variable on the left side of the reduction operation
4132 /// (references element of array in original variable).
4133 /// \param RHSVar Variable on the right side of the reduction operation
4134 /// (references element of array in original variable).
4135 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
4136 /// RHSVar.
4137 static void EmitOMPAggregateReduction(
4138     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4139     const VarDecl *RHSVar,
4140     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4141                                   const Expr *, const Expr *)> &RedOpGen,
4142     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4143     const Expr *UpExpr = nullptr) {
4144   // Perform element-by-element initialization.
4145   QualType ElementTy;
4146   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4147   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4148 
4149   // Drill down to the base element type on both arrays.
4150   auto ArrayTy = Type->getAsArrayTypeUnsafe();
4151   auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4152 
4153   auto RHSBegin = RHSAddr.getPointer();
4154   auto LHSBegin = LHSAddr.getPointer();
4155   // Cast from pointer to array type to pointer to single element.
4156   auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
4157   // The basic structure here is a while-do loop.
4158   auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4159   auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4160   auto IsEmpty =
4161       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4162   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4163 
4164   // Enter the loop body, making that address the current address.
4165   auto EntryBB = CGF.Builder.GetInsertBlock();
4166   CGF.EmitBlock(BodyBB);
4167 
4168   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4169 
4170   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4171       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4172   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4173   Address RHSElementCurrent =
4174       Address(RHSElementPHI,
4175               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4176 
4177   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4178       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4179   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4180   Address LHSElementCurrent =
4181       Address(LHSElementPHI,
4182               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4183 
4184   // Emit copy.
4185   CodeGenFunction::OMPPrivateScope Scope(CGF);
4186   Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; });
4187   Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; });
4188   Scope.Privatize();
4189   RedOpGen(CGF, XExpr, EExpr, UpExpr);
4190   Scope.ForceCleanup();
4191 
4192   // Shift the address forward by one element.
4193   auto LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4194       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
4195   auto RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4196       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
4197   // Check whether we've reached the end.
4198   auto Done =
4199       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4200   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4201   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4202   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4203 
4204   // Done.
4205   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4206 }
4207 
4208 /// Emit reduction combiner. If the combiner is a simple expression emit it as
4209 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4210 /// UDR combiner function.
4211 static void emitReductionCombiner(CodeGenFunction &CGF,
4212                                   const Expr *ReductionOp) {
4213   if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
4214     if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4215       if (auto *DRE =
4216               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4217         if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4218           std::pair<llvm::Function *, llvm::Function *> Reduction =
4219               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
4220           RValue Func = RValue::get(Reduction.first);
4221           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4222           CGF.EmitIgnoredExpr(ReductionOp);
4223           return;
4224         }
4225   CGF.EmitIgnoredExpr(ReductionOp);
4226 }
4227 
4228 static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
4229                                           llvm::Type *ArgsType,
4230                                           ArrayRef<const Expr *> Privates,
4231                                           ArrayRef<const Expr *> LHSExprs,
4232                                           ArrayRef<const Expr *> RHSExprs,
4233                                           ArrayRef<const Expr *> ReductionOps) {
4234   auto &C = CGM.getContext();
4235 
4236   // void reduction_func(void *LHSArg, void *RHSArg);
4237   FunctionArgList Args;
4238   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
4239                            C.VoidPtrTy);
4240   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
4241                            C.VoidPtrTy);
4242   Args.push_back(&LHSArg);
4243   Args.push_back(&RHSArg);
4244   auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4245   auto *Fn = llvm::Function::Create(
4246       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
4247       ".omp.reduction.reduction_func", &CGM.getModule());
4248   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
4249   CodeGenFunction CGF(CGM);
4250   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
4251 
4252   // Dst = (void*[n])(LHSArg);
4253   // Src = (void*[n])(RHSArg);
4254   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4255       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4256       ArgsType), CGF.getPointerAlign());
4257   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4258       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
4259       ArgsType), CGF.getPointerAlign());
4260 
4261   //  ...
4262   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4263   //  ...
4264   CodeGenFunction::OMPPrivateScope Scope(CGF);
4265   auto IPriv = Privates.begin();
4266   unsigned Idx = 0;
4267   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4268     auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
4269     Scope.addPrivate(RHSVar, [&]() -> Address {
4270       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
4271     });
4272     auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
4273     Scope.addPrivate(LHSVar, [&]() -> Address {
4274       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
4275     });
4276     QualType PrivTy = (*IPriv)->getType();
4277     if (PrivTy->isVariablyModifiedType()) {
4278       // Get array size and emit VLA type.
4279       ++Idx;
4280       Address Elem =
4281           CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize());
4282       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
4283       auto *VLA = CGF.getContext().getAsVariableArrayType(PrivTy);
4284       auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
4285       CodeGenFunction::OpaqueValueMapping OpaqueMap(
4286           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
4287       CGF.EmitVariablyModifiedType(PrivTy);
4288     }
4289   }
4290   Scope.Privatize();
4291   IPriv = Privates.begin();
4292   auto ILHS = LHSExprs.begin();
4293   auto IRHS = RHSExprs.begin();
4294   for (auto *E : ReductionOps) {
4295     if ((*IPriv)->getType()->isArrayType()) {
4296       // Emit reduction for array section.
4297       auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4298       auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4299       EmitOMPAggregateReduction(
4300           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4301           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4302             emitReductionCombiner(CGF, E);
4303           });
4304     } else
4305       // Emit reduction for array subscript or single variable.
4306       emitReductionCombiner(CGF, E);
4307     ++IPriv;
4308     ++ILHS;
4309     ++IRHS;
4310   }
4311   Scope.ForceCleanup();
4312   CGF.FinishFunction();
4313   return Fn;
4314 }
4315 
4316 static void emitSingleReductionCombiner(CodeGenFunction &CGF,
4317                                         const Expr *ReductionOp,
4318                                         const Expr *PrivateRef,
4319                                         const DeclRefExpr *LHS,
4320                                         const DeclRefExpr *RHS) {
4321   if (PrivateRef->getType()->isArrayType()) {
4322     // Emit reduction for array section.
4323     auto *LHSVar = cast<VarDecl>(LHS->getDecl());
4324     auto *RHSVar = cast<VarDecl>(RHS->getDecl());
4325     EmitOMPAggregateReduction(
4326         CGF, PrivateRef->getType(), LHSVar, RHSVar,
4327         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4328           emitReductionCombiner(CGF, ReductionOp);
4329         });
4330   } else
4331     // Emit reduction for array subscript or single variable.
4332     emitReductionCombiner(CGF, ReductionOp);
4333 }
4334 
4335 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
4336                                     ArrayRef<const Expr *> Privates,
4337                                     ArrayRef<const Expr *> LHSExprs,
4338                                     ArrayRef<const Expr *> RHSExprs,
4339                                     ArrayRef<const Expr *> ReductionOps,
4340                                     bool WithNowait, bool SimpleReduction) {
4341   if (!CGF.HaveInsertPoint())
4342     return;
4343   // Next code should be emitted for reduction:
4344   //
4345   // static kmp_critical_name lock = { 0 };
4346   //
4347   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
4348   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
4349   //  ...
4350   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
4351   //  *(Type<n>-1*)rhs[<n>-1]);
4352   // }
4353   //
4354   // ...
4355   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
4356   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
4357   // RedList, reduce_func, &<lock>)) {
4358   // case 1:
4359   //  ...
4360   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4361   //  ...
4362   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4363   // break;
4364   // case 2:
4365   //  ...
4366   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
4367   //  ...
4368   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
4369   // break;
4370   // default:;
4371   // }
4372   //
4373   // if SimpleReduction is true, only the next code is generated:
4374   //  ...
4375   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4376   //  ...
4377 
4378   auto &C = CGM.getContext();
4379 
4380   if (SimpleReduction) {
4381     CodeGenFunction::RunCleanupsScope Scope(CGF);
4382     auto IPriv = Privates.begin();
4383     auto ILHS = LHSExprs.begin();
4384     auto IRHS = RHSExprs.begin();
4385     for (auto *E : ReductionOps) {
4386       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
4387                                   cast<DeclRefExpr>(*IRHS));
4388       ++IPriv;
4389       ++ILHS;
4390       ++IRHS;
4391     }
4392     return;
4393   }
4394 
4395   // 1. Build a list of reduction variables.
4396   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
4397   auto Size = RHSExprs.size();
4398   for (auto *E : Privates) {
4399     if (E->getType()->isVariablyModifiedType())
4400       // Reserve place for array size.
4401       ++Size;
4402   }
4403   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
4404   QualType ReductionArrayTy =
4405       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
4406                              /*IndexTypeQuals=*/0);
4407   Address ReductionList =
4408       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
4409   auto IPriv = Privates.begin();
4410   unsigned Idx = 0;
4411   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
4412     Address Elem =
4413       CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize());
4414     CGF.Builder.CreateStore(
4415         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4416             CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
4417         Elem);
4418     if ((*IPriv)->getType()->isVariablyModifiedType()) {
4419       // Store array size.
4420       ++Idx;
4421       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
4422                                              CGF.getPointerSize());
4423       llvm::Value *Size = CGF.Builder.CreateIntCast(
4424           CGF.getVLASize(
4425                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
4426               .first,
4427           CGF.SizeTy, /*isSigned=*/false);
4428       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
4429                               Elem);
4430     }
4431   }
4432 
4433   // 2. Emit reduce_func().
4434   auto *ReductionFn = emitReductionFunction(
4435       CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
4436       LHSExprs, RHSExprs, ReductionOps);
4437 
4438   // 3. Create static kmp_critical_name lock = { 0 };
4439   auto *Lock = getCriticalRegionLock(".reduction");
4440 
4441   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
4442   // RedList, reduce_func, &<lock>);
4443   auto *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
4444   auto *ThreadId = getThreadID(CGF, Loc);
4445   auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
4446   auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4447       ReductionList.getPointer(), CGF.VoidPtrTy);
4448   llvm::Value *Args[] = {
4449       IdentTLoc,                             // ident_t *<loc>
4450       ThreadId,                              // i32 <gtid>
4451       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
4452       ReductionArrayTySize,                  // size_type sizeof(RedList)
4453       RL,                                    // void *RedList
4454       ReductionFn, // void (*) (void *, void *) <reduce_func>
4455       Lock         // kmp_critical_name *&<lock>
4456   };
4457   auto Res = CGF.EmitRuntimeCall(
4458       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
4459                                        : OMPRTL__kmpc_reduce),
4460       Args);
4461 
4462   // 5. Build switch(res)
4463   auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
4464   auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
4465 
4466   // 6. Build case 1:
4467   //  ...
4468   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4469   //  ...
4470   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4471   // break;
4472   auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
4473   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
4474   CGF.EmitBlock(Case1BB);
4475 
4476   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4477   llvm::Value *EndArgs[] = {
4478       IdentTLoc, // ident_t *<loc>
4479       ThreadId,  // i32 <gtid>
4480       Lock       // kmp_critical_name *&<lock>
4481   };
4482   auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps](
4483       CodeGenFunction &CGF, PrePostActionTy &Action) {
4484     auto IPriv = Privates.begin();
4485     auto ILHS = LHSExprs.begin();
4486     auto IRHS = RHSExprs.begin();
4487     for (auto *E : ReductionOps) {
4488       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
4489                                   cast<DeclRefExpr>(*IRHS));
4490       ++IPriv;
4491       ++ILHS;
4492       ++IRHS;
4493     }
4494   };
4495   RegionCodeGenTy RCG(CodeGen);
4496   CommonActionTy Action(
4497       nullptr, llvm::None,
4498       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
4499                                        : OMPRTL__kmpc_end_reduce),
4500       EndArgs);
4501   RCG.setAction(Action);
4502   RCG(CGF);
4503 
4504   CGF.EmitBranch(DefaultBB);
4505 
4506   // 7. Build case 2:
4507   //  ...
4508   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
4509   //  ...
4510   // break;
4511   auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
4512   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
4513   CGF.EmitBlock(Case2BB);
4514 
4515   auto &&AtomicCodeGen = [Loc, &Privates, &LHSExprs, &RHSExprs, &ReductionOps](
4516       CodeGenFunction &CGF, PrePostActionTy &Action) {
4517     auto ILHS = LHSExprs.begin();
4518     auto IRHS = RHSExprs.begin();
4519     auto IPriv = Privates.begin();
4520     for (auto *E : ReductionOps) {
4521       const Expr *XExpr = nullptr;
4522       const Expr *EExpr = nullptr;
4523       const Expr *UpExpr = nullptr;
4524       BinaryOperatorKind BO = BO_Comma;
4525       if (auto *BO = dyn_cast<BinaryOperator>(E)) {
4526         if (BO->getOpcode() == BO_Assign) {
4527           XExpr = BO->getLHS();
4528           UpExpr = BO->getRHS();
4529         }
4530       }
4531       // Try to emit update expression as a simple atomic.
4532       auto *RHSExpr = UpExpr;
4533       if (RHSExpr) {
4534         // Analyze RHS part of the whole expression.
4535         if (auto *ACO = dyn_cast<AbstractConditionalOperator>(
4536                 RHSExpr->IgnoreParenImpCasts())) {
4537           // If this is a conditional operator, analyze its condition for
4538           // min/max reduction operator.
4539           RHSExpr = ACO->getCond();
4540         }
4541         if (auto *BORHS =
4542                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
4543           EExpr = BORHS->getRHS();
4544           BO = BORHS->getOpcode();
4545         }
4546       }
4547       if (XExpr) {
4548         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4549         auto &&AtomicRedGen = [BO, VD, IPriv,
4550                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
4551                                     const Expr *EExpr, const Expr *UpExpr) {
4552           LValue X = CGF.EmitLValue(XExpr);
4553           RValue E;
4554           if (EExpr)
4555             E = CGF.EmitAnyExpr(EExpr);
4556           CGF.EmitOMPAtomicSimpleUpdateExpr(
4557               X, E, BO, /*IsXLHSInRHSPart=*/true,
4558               llvm::AtomicOrdering::Monotonic, Loc,
4559               [&CGF, UpExpr, VD, IPriv, Loc](RValue XRValue) {
4560                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4561                 PrivateScope.addPrivate(
4562                     VD, [&CGF, VD, XRValue, Loc]() -> Address {
4563                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
4564                       CGF.emitOMPSimpleStore(
4565                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
4566                           VD->getType().getNonReferenceType(), Loc);
4567                       return LHSTemp;
4568                     });
4569                 (void)PrivateScope.Privatize();
4570                 return CGF.EmitAnyExpr(UpExpr);
4571               });
4572         };
4573         if ((*IPriv)->getType()->isArrayType()) {
4574           // Emit atomic reduction for array section.
4575           auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4576           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
4577                                     AtomicRedGen, XExpr, EExpr, UpExpr);
4578         } else
4579           // Emit atomic reduction for array subscript or single variable.
4580           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
4581       } else {
4582         // Emit as a critical region.
4583         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
4584                                      const Expr *, const Expr *) {
4585           auto &RT = CGF.CGM.getOpenMPRuntime();
4586           RT.emitCriticalRegion(
4587               CGF, ".atomic_reduction",
4588               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
4589                 Action.Enter(CGF);
4590                 emitReductionCombiner(CGF, E);
4591               },
4592               Loc);
4593         };
4594         if ((*IPriv)->getType()->isArrayType()) {
4595           auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4596           auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4597           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4598                                     CritRedGen);
4599         } else
4600           CritRedGen(CGF, nullptr, nullptr, nullptr);
4601       }
4602       ++ILHS;
4603       ++IRHS;
4604       ++IPriv;
4605     }
4606   };
4607   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
4608   if (!WithNowait) {
4609     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
4610     llvm::Value *EndArgs[] = {
4611         IdentTLoc, // ident_t *<loc>
4612         ThreadId,  // i32 <gtid>
4613         Lock       // kmp_critical_name *&<lock>
4614     };
4615     CommonActionTy Action(nullptr, llvm::None,
4616                           createRuntimeFunction(OMPRTL__kmpc_end_reduce),
4617                           EndArgs);
4618     AtomicRCG.setAction(Action);
4619     AtomicRCG(CGF);
4620   } else
4621     AtomicRCG(CGF);
4622 
4623   CGF.EmitBranch(DefaultBB);
4624   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
4625 }
4626 
4627 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
4628                                        SourceLocation Loc) {
4629   if (!CGF.HaveInsertPoint())
4630     return;
4631   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
4632   // global_tid);
4633   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
4634   // Ignore return result until untied tasks are supported.
4635   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
4636   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4637     Region->emitUntiedSwitch(CGF);
4638 }
4639 
4640 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
4641                                            OpenMPDirectiveKind InnerKind,
4642                                            const RegionCodeGenTy &CodeGen,
4643                                            bool HasCancel) {
4644   if (!CGF.HaveInsertPoint())
4645     return;
4646   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
4647   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
4648 }
4649 
4650 namespace {
4651 enum RTCancelKind {
4652   CancelNoreq = 0,
4653   CancelParallel = 1,
4654   CancelLoop = 2,
4655   CancelSections = 3,
4656   CancelTaskgroup = 4
4657 };
4658 } // anonymous namespace
4659 
4660 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
4661   RTCancelKind CancelKind = CancelNoreq;
4662   if (CancelRegion == OMPD_parallel)
4663     CancelKind = CancelParallel;
4664   else if (CancelRegion == OMPD_for)
4665     CancelKind = CancelLoop;
4666   else if (CancelRegion == OMPD_sections)
4667     CancelKind = CancelSections;
4668   else {
4669     assert(CancelRegion == OMPD_taskgroup);
4670     CancelKind = CancelTaskgroup;
4671   }
4672   return CancelKind;
4673 }
4674 
4675 void CGOpenMPRuntime::emitCancellationPointCall(
4676     CodeGenFunction &CGF, SourceLocation Loc,
4677     OpenMPDirectiveKind CancelRegion) {
4678   if (!CGF.HaveInsertPoint())
4679     return;
4680   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
4681   // global_tid, kmp_int32 cncl_kind);
4682   if (auto *OMPRegionInfo =
4683           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
4684     if (OMPRegionInfo->hasCancel()) {
4685       llvm::Value *Args[] = {
4686           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
4687           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
4688       // Ignore return result until untied tasks are supported.
4689       auto *Result = CGF.EmitRuntimeCall(
4690           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
4691       // if (__kmpc_cancellationpoint()) {
4692       //  __kmpc_cancel_barrier();
4693       //   exit from construct;
4694       // }
4695       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
4696       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
4697       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
4698       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
4699       CGF.EmitBlock(ExitBB);
4700       // __kmpc_cancel_barrier();
4701       emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
4702       // exit from construct;
4703       auto CancelDest =
4704           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
4705       CGF.EmitBranchThroughCleanup(CancelDest);
4706       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
4707     }
4708   }
4709 }
4710 
4711 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
4712                                      const Expr *IfCond,
4713                                      OpenMPDirectiveKind CancelRegion) {
4714   if (!CGF.HaveInsertPoint())
4715     return;
4716   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
4717   // kmp_int32 cncl_kind);
4718   if (auto *OMPRegionInfo =
4719           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
4720     auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
4721                                                         PrePostActionTy &) {
4722       auto &RT = CGF.CGM.getOpenMPRuntime();
4723       llvm::Value *Args[] = {
4724           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
4725           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
4726       // Ignore return result until untied tasks are supported.
4727       auto *Result = CGF.EmitRuntimeCall(
4728           RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
4729       // if (__kmpc_cancel()) {
4730       //  __kmpc_cancel_barrier();
4731       //   exit from construct;
4732       // }
4733       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
4734       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
4735       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
4736       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
4737       CGF.EmitBlock(ExitBB);
4738       // __kmpc_cancel_barrier();
4739       RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
4740       // exit from construct;
4741       auto CancelDest =
4742           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
4743       CGF.EmitBranchThroughCleanup(CancelDest);
4744       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
4745     };
4746     if (IfCond)
4747       emitOMPIfClause(CGF, IfCond, ThenGen,
4748                       [](CodeGenFunction &, PrePostActionTy &) {});
4749     else {
4750       RegionCodeGenTy ThenRCG(ThenGen);
4751       ThenRCG(CGF);
4752     }
4753   }
4754 }
4755 
4756 /// \brief Obtain information that uniquely identifies a target entry. This
4757 /// consists of the file and device IDs as well as line number associated with
4758 /// the relevant entry source location.
4759 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
4760                                      unsigned &DeviceID, unsigned &FileID,
4761                                      unsigned &LineNum) {
4762 
4763   auto &SM = C.getSourceManager();
4764 
4765   // The loc should be always valid and have a file ID (the user cannot use
4766   // #pragma directives in macros)
4767 
4768   assert(Loc.isValid() && "Source location is expected to be always valid.");
4769   assert(Loc.isFileID() && "Source location is expected to refer to a file.");
4770 
4771   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
4772   assert(PLoc.isValid() && "Source location is expected to be always valid.");
4773 
4774   llvm::sys::fs::UniqueID ID;
4775   if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
4776     llvm_unreachable("Source file with target region no longer exists!");
4777 
4778   DeviceID = ID.getDevice();
4779   FileID = ID.getFile();
4780   LineNum = PLoc.getLine();
4781 }
4782 
4783 void CGOpenMPRuntime::emitTargetOutlinedFunction(
4784     const OMPExecutableDirective &D, StringRef ParentName,
4785     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
4786     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
4787   assert(!ParentName.empty() && "Invalid target region parent name!");
4788 
4789   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
4790                                    IsOffloadEntry, CodeGen);
4791 }
4792 
4793 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
4794     const OMPExecutableDirective &D, StringRef ParentName,
4795     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
4796     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
4797   // Create a unique name for the entry function using the source location
4798   // information of the current target region. The name will be something like:
4799   //
4800   // __omp_offloading_DD_FFFF_PP_lBB
4801   //
4802   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
4803   // mangled name of the function that encloses the target region and BB is the
4804   // line number of the target region.
4805 
4806   unsigned DeviceID;
4807   unsigned FileID;
4808   unsigned Line;
4809   getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID,
4810                            Line);
4811   SmallString<64> EntryFnName;
4812   {
4813     llvm::raw_svector_ostream OS(EntryFnName);
4814     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
4815        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
4816   }
4817 
4818   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
4819 
4820   CodeGenFunction CGF(CGM, true);
4821   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
4822   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
4823 
4824   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
4825 
4826   // If this target outline function is not an offload entry, we don't need to
4827   // register it.
4828   if (!IsOffloadEntry)
4829     return;
4830 
4831   // The target region ID is used by the runtime library to identify the current
4832   // target region, so it only has to be unique and not necessarily point to
4833   // anything. It could be the pointer to the outlined function that implements
4834   // the target region, but we aren't using that so that the compiler doesn't
4835   // need to keep that, and could therefore inline the host function if proven
4836   // worthwhile during optimization. In the other hand, if emitting code for the
4837   // device, the ID has to be the function address so that it can retrieved from
4838   // the offloading entry and launched by the runtime library. We also mark the
4839   // outlined function to have external linkage in case we are emitting code for
4840   // the device, because these functions will be entry points to the device.
4841 
4842   if (CGM.getLangOpts().OpenMPIsDevice) {
4843     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
4844     OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage);
4845   } else
4846     OutlinedFnID = new llvm::GlobalVariable(
4847         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
4848         llvm::GlobalValue::PrivateLinkage,
4849         llvm::Constant::getNullValue(CGM.Int8Ty), ".omp_offload.region_id");
4850 
4851   // Register the information for the entry associated with this target region.
4852   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
4853       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID);
4854 }
4855 
4856 /// discard all CompoundStmts intervening between two constructs
4857 static const Stmt *ignoreCompoundStmts(const Stmt *Body) {
4858   while (auto *CS = dyn_cast_or_null<CompoundStmt>(Body))
4859     Body = CS->body_front();
4860 
4861   return Body;
4862 }
4863 
4864 /// \brief Emit the num_teams clause of an enclosed teams directive at the
4865 /// target region scope. If there is no teams directive associated with the
4866 /// target directive, or if there is no num_teams clause associated with the
4867 /// enclosed teams directive, return nullptr.
4868 static llvm::Value *
4869 emitNumTeamsClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime,
4870                                      CodeGenFunction &CGF,
4871                                      const OMPExecutableDirective &D) {
4872 
4873   assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
4874                                               "teams directive expected to be "
4875                                               "emitted only for the host!");
4876 
4877   // FIXME: For the moment we do not support combined directives with target and
4878   // teams, so we do not expect to get any num_teams clause in the provided
4879   // directive. Once we support that, this assertion can be replaced by the
4880   // actual emission of the clause expression.
4881   assert(D.getSingleClause<OMPNumTeamsClause>() == nullptr &&
4882          "Not expecting clause in directive.");
4883 
4884   // If the current target region has a teams region enclosed, we need to get
4885   // the number of teams to pass to the runtime function call. This is done
4886   // by generating the expression in a inlined region. This is required because
4887   // the expression is captured in the enclosing target environment when the
4888   // teams directive is not combined with target.
4889 
4890   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
4891 
4892   // FIXME: Accommodate other combined directives with teams when they become
4893   // available.
4894   if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>(
4895           ignoreCompoundStmts(CS.getCapturedStmt()))) {
4896     if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) {
4897       CGOpenMPInnerExprInfo CGInfo(CGF, CS);
4898       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
4899       llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams());
4900       return CGF.Builder.CreateIntCast(NumTeams, CGF.Int32Ty,
4901                                        /*IsSigned=*/true);
4902     }
4903 
4904     // If we have an enclosed teams directive but no num_teams clause we use
4905     // the default value 0.
4906     return CGF.Builder.getInt32(0);
4907   }
4908 
4909   // No teams associated with the directive.
4910   return nullptr;
4911 }
4912 
4913 /// \brief Emit the thread_limit clause of an enclosed teams directive at the
4914 /// target region scope. If there is no teams directive associated with the
4915 /// target directive, or if there is no thread_limit clause associated with the
4916 /// enclosed teams directive, return nullptr.
4917 static llvm::Value *
4918 emitThreadLimitClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime,
4919                                         CodeGenFunction &CGF,
4920                                         const OMPExecutableDirective &D) {
4921 
4922   assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
4923                                               "teams directive expected to be "
4924                                               "emitted only for the host!");
4925 
4926   // FIXME: For the moment we do not support combined directives with target and
4927   // teams, so we do not expect to get any thread_limit clause in the provided
4928   // directive. Once we support that, this assertion can be replaced by the
4929   // actual emission of the clause expression.
4930   assert(D.getSingleClause<OMPThreadLimitClause>() == nullptr &&
4931          "Not expecting clause in directive.");
4932 
4933   // If the current target region has a teams region enclosed, we need to get
4934   // the thread limit to pass to the runtime function call. This is done
4935   // by generating the expression in a inlined region. This is required because
4936   // the expression is captured in the enclosing target environment when the
4937   // teams directive is not combined with target.
4938 
4939   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
4940 
4941   // FIXME: Accommodate other combined directives with teams when they become
4942   // available.
4943   if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>(
4944           ignoreCompoundStmts(CS.getCapturedStmt()))) {
4945     if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) {
4946       CGOpenMPInnerExprInfo CGInfo(CGF, CS);
4947       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
4948       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit());
4949       return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty,
4950                                        /*IsSigned=*/true);
4951     }
4952 
4953     // If we have an enclosed teams directive but no thread_limit clause we use
4954     // the default value 0.
4955     return CGF.Builder.getInt32(0);
4956   }
4957 
4958   // No teams associated with the directive.
4959   return nullptr;
4960 }
4961 
4962 namespace {
4963 // \brief Utility to handle information from clauses associated with a given
4964 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
4965 // It provides a convenient interface to obtain the information and generate
4966 // code for that information.
4967 class MappableExprsHandler {
4968 public:
4969   /// \brief Values for bit flags used to specify the mapping type for
4970   /// offloading.
4971   enum OpenMPOffloadMappingFlags {
4972     /// \brief Allocate memory on the device and move data from host to device.
4973     OMP_MAP_TO = 0x01,
4974     /// \brief Allocate memory on the device and move data from device to host.
4975     OMP_MAP_FROM = 0x02,
4976     /// \brief Always perform the requested mapping action on the element, even
4977     /// if it was already mapped before.
4978     OMP_MAP_ALWAYS = 0x04,
4979     /// \brief Delete the element from the device environment, ignoring the
4980     /// current reference count associated with the element.
4981     OMP_MAP_DELETE = 0x08,
4982     /// \brief The element being mapped is a pointer, therefore the pointee
4983     /// should be mapped as well.
4984     OMP_MAP_IS_PTR = 0x10,
4985     /// \brief This flags signals that an argument is the first one relating to
4986     /// a map/private clause expression. For some cases a single
4987     /// map/privatization results in multiple arguments passed to the runtime
4988     /// library.
4989     OMP_MAP_FIRST_REF = 0x20,
4990     /// \brief Signal that the runtime library has to return the device pointer
4991     /// in the current position for the data being mapped.
4992     OMP_MAP_RETURN_PTR = 0x40,
4993     /// \brief This flag signals that the reference being passed is a pointer to
4994     /// private data.
4995     OMP_MAP_PRIVATE_PTR = 0x80,
4996     /// \brief Pass the element to the device by value.
4997     OMP_MAP_PRIVATE_VAL = 0x100,
4998   };
4999 
5000   /// Class that associates information with a base pointer to be passed to the
5001   /// runtime library.
5002   class BasePointerInfo {
5003     /// The base pointer.
5004     llvm::Value *Ptr = nullptr;
5005     /// The base declaration that refers to this device pointer, or null if
5006     /// there is none.
5007     const ValueDecl *DevPtrDecl = nullptr;
5008 
5009   public:
5010     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
5011         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
5012     llvm::Value *operator*() const { return Ptr; }
5013     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
5014     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
5015   };
5016 
5017   typedef SmallVector<BasePointerInfo, 16> MapBaseValuesArrayTy;
5018   typedef SmallVector<llvm::Value *, 16> MapValuesArrayTy;
5019   typedef SmallVector<unsigned, 16> MapFlagsArrayTy;
5020 
5021 private:
5022   /// \brief Directive from where the map clauses were extracted.
5023   const OMPExecutableDirective &CurDir;
5024 
5025   /// \brief Function the directive is being generated for.
5026   CodeGenFunction &CGF;
5027 
5028   /// \brief Set of all first private variables in the current directive.
5029   llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls;
5030 
5031   /// Map between device pointer declarations and their expression components.
5032   /// The key value for declarations in 'this' is null.
5033   llvm::DenseMap<
5034       const ValueDecl *,
5035       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
5036       DevPointersMap;
5037 
5038   llvm::Value *getExprTypeSize(const Expr *E) const {
5039     auto ExprTy = E->getType().getCanonicalType();
5040 
5041     // Reference types are ignored for mapping purposes.
5042     if (auto *RefTy = ExprTy->getAs<ReferenceType>())
5043       ExprTy = RefTy->getPointeeType().getCanonicalType();
5044 
5045     // Given that an array section is considered a built-in type, we need to
5046     // do the calculation based on the length of the section instead of relying
5047     // on CGF.getTypeSize(E->getType()).
5048     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
5049       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
5050                             OAE->getBase()->IgnoreParenImpCasts())
5051                             .getCanonicalType();
5052 
5053       // If there is no length associated with the expression, that means we
5054       // are using the whole length of the base.
5055       if (!OAE->getLength() && OAE->getColonLoc().isValid())
5056         return CGF.getTypeSize(BaseTy);
5057 
5058       llvm::Value *ElemSize;
5059       if (auto *PTy = BaseTy->getAs<PointerType>())
5060         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
5061       else {
5062         auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
5063         assert(ATy && "Expecting array type if not a pointer type.");
5064         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
5065       }
5066 
5067       // If we don't have a length at this point, that is because we have an
5068       // array section with a single element.
5069       if (!OAE->getLength())
5070         return ElemSize;
5071 
5072       auto *LengthVal = CGF.EmitScalarExpr(OAE->getLength());
5073       LengthVal =
5074           CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false);
5075       return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
5076     }
5077     return CGF.getTypeSize(ExprTy);
5078   }
5079 
5080   /// \brief Return the corresponding bits for a given map clause modifier. Add
5081   /// a flag marking the map as a pointer if requested. Add a flag marking the
5082   /// map as the first one of a series of maps that relate to the same map
5083   /// expression.
5084   unsigned getMapTypeBits(OpenMPMapClauseKind MapType,
5085                           OpenMPMapClauseKind MapTypeModifier, bool AddPtrFlag,
5086                           bool AddIsFirstFlag) const {
5087     unsigned Bits = 0u;
5088     switch (MapType) {
5089     case OMPC_MAP_alloc:
5090     case OMPC_MAP_release:
5091       // alloc and release is the default behavior in the runtime library,  i.e.
5092       // if we don't pass any bits alloc/release that is what the runtime is
5093       // going to do. Therefore, we don't need to signal anything for these two
5094       // type modifiers.
5095       break;
5096     case OMPC_MAP_to:
5097       Bits = OMP_MAP_TO;
5098       break;
5099     case OMPC_MAP_from:
5100       Bits = OMP_MAP_FROM;
5101       break;
5102     case OMPC_MAP_tofrom:
5103       Bits = OMP_MAP_TO | OMP_MAP_FROM;
5104       break;
5105     case OMPC_MAP_delete:
5106       Bits = OMP_MAP_DELETE;
5107       break;
5108     default:
5109       llvm_unreachable("Unexpected map type!");
5110       break;
5111     }
5112     if (AddPtrFlag)
5113       Bits |= OMP_MAP_IS_PTR;
5114     if (AddIsFirstFlag)
5115       Bits |= OMP_MAP_FIRST_REF;
5116     if (MapTypeModifier == OMPC_MAP_always)
5117       Bits |= OMP_MAP_ALWAYS;
5118     return Bits;
5119   }
5120 
5121   /// \brief Return true if the provided expression is a final array section. A
5122   /// final array section, is one whose length can't be proved to be one.
5123   bool isFinalArraySectionExpression(const Expr *E) const {
5124     auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
5125 
5126     // It is not an array section and therefore not a unity-size one.
5127     if (!OASE)
5128       return false;
5129 
5130     // An array section with no colon always refer to a single element.
5131     if (OASE->getColonLoc().isInvalid())
5132       return false;
5133 
5134     auto *Length = OASE->getLength();
5135 
5136     // If we don't have a length we have to check if the array has size 1
5137     // for this dimension. Also, we should always expect a length if the
5138     // base type is pointer.
5139     if (!Length) {
5140       auto BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
5141                          OASE->getBase()->IgnoreParenImpCasts())
5142                          .getCanonicalType();
5143       if (auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
5144         return ATy->getSize().getSExtValue() != 1;
5145       // If we don't have a constant dimension length, we have to consider
5146       // the current section as having any size, so it is not necessarily
5147       // unitary. If it happen to be unity size, that's user fault.
5148       return true;
5149     }
5150 
5151     // Check if the length evaluates to 1.
5152     llvm::APSInt ConstLength;
5153     if (!Length->EvaluateAsInt(ConstLength, CGF.getContext()))
5154       return true; // Can have more that size 1.
5155 
5156     return ConstLength.getSExtValue() != 1;
5157   }
5158 
5159   /// \brief Generate the base pointers, section pointers, sizes and map type
5160   /// bits for the provided map type, map modifier, and expression components.
5161   /// \a IsFirstComponent should be set to true if the provided set of
5162   /// components is the first associated with a capture.
5163   void generateInfoForComponentList(
5164       OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier,
5165       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
5166       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
5167       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
5168       bool IsFirstComponentList) const {
5169 
5170     // The following summarizes what has to be generated for each map and the
5171     // types bellow. The generated information is expressed in this order:
5172     // base pointer, section pointer, size, flags
5173     // (to add to the ones that come from the map type and modifier).
5174     //
5175     // double d;
5176     // int i[100];
5177     // float *p;
5178     //
5179     // struct S1 {
5180     //   int i;
5181     //   float f[50];
5182     // }
5183     // struct S2 {
5184     //   int i;
5185     //   float f[50];
5186     //   S1 s;
5187     //   double *p;
5188     //   struct S2 *ps;
5189     // }
5190     // S2 s;
5191     // S2 *ps;
5192     //
5193     // map(d)
5194     // &d, &d, sizeof(double), noflags
5195     //
5196     // map(i)
5197     // &i, &i, 100*sizeof(int), noflags
5198     //
5199     // map(i[1:23])
5200     // &i(=&i[0]), &i[1], 23*sizeof(int), noflags
5201     //
5202     // map(p)
5203     // &p, &p, sizeof(float*), noflags
5204     //
5205     // map(p[1:24])
5206     // p, &p[1], 24*sizeof(float), noflags
5207     //
5208     // map(s)
5209     // &s, &s, sizeof(S2), noflags
5210     //
5211     // map(s.i)
5212     // &s, &(s.i), sizeof(int), noflags
5213     //
5214     // map(s.s.f)
5215     // &s, &(s.i.f), 50*sizeof(int), noflags
5216     //
5217     // map(s.p)
5218     // &s, &(s.p), sizeof(double*), noflags
5219     //
5220     // map(s.p[:22], s.a s.b)
5221     // &s, &(s.p), sizeof(double*), noflags
5222     // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag + extra_flag
5223     //
5224     // map(s.ps)
5225     // &s, &(s.ps), sizeof(S2*), noflags
5226     //
5227     // map(s.ps->s.i)
5228     // &s, &(s.ps), sizeof(S2*), noflags
5229     // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag + extra_flag
5230     //
5231     // map(s.ps->ps)
5232     // &s, &(s.ps), sizeof(S2*), noflags
5233     // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
5234     //
5235     // map(s.ps->ps->ps)
5236     // &s, &(s.ps), sizeof(S2*), noflags
5237     // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
5238     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
5239     //
5240     // map(s.ps->ps->s.f[:22])
5241     // &s, &(s.ps), sizeof(S2*), noflags
5242     // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
5243     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + extra_flag
5244     //
5245     // map(ps)
5246     // &ps, &ps, sizeof(S2*), noflags
5247     //
5248     // map(ps->i)
5249     // ps, &(ps->i), sizeof(int), noflags
5250     //
5251     // map(ps->s.f)
5252     // ps, &(ps->s.f[0]), 50*sizeof(float), noflags
5253     //
5254     // map(ps->p)
5255     // ps, &(ps->p), sizeof(double*), noflags
5256     //
5257     // map(ps->p[:22])
5258     // ps, &(ps->p), sizeof(double*), noflags
5259     // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag + extra_flag
5260     //
5261     // map(ps->ps)
5262     // ps, &(ps->ps), sizeof(S2*), noflags
5263     //
5264     // map(ps->ps->s.i)
5265     // ps, &(ps->ps), sizeof(S2*), noflags
5266     // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag + extra_flag
5267     //
5268     // map(ps->ps->ps)
5269     // ps, &(ps->ps), sizeof(S2*), noflags
5270     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
5271     //
5272     // map(ps->ps->ps->ps)
5273     // ps, &(ps->ps), sizeof(S2*), noflags
5274     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
5275     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
5276     //
5277     // map(ps->ps->ps->s.f[:22])
5278     // ps, &(ps->ps), sizeof(S2*), noflags
5279     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
5280     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag +
5281     // extra_flag
5282 
5283     // Track if the map information being generated is the first for a capture.
5284     bool IsCaptureFirstInfo = IsFirstComponentList;
5285 
5286     // Scan the components from the base to the complete expression.
5287     auto CI = Components.rbegin();
5288     auto CE = Components.rend();
5289     auto I = CI;
5290 
5291     // Track if the map information being generated is the first for a list of
5292     // components.
5293     bool IsExpressionFirstInfo = true;
5294     llvm::Value *BP = nullptr;
5295 
5296     if (auto *ME = dyn_cast<MemberExpr>(I->getAssociatedExpression())) {
5297       // The base is the 'this' pointer. The content of the pointer is going
5298       // to be the base of the field being mapped.
5299       BP = CGF.EmitScalarExpr(ME->getBase());
5300     } else {
5301       // The base is the reference to the variable.
5302       // BP = &Var.
5303       BP = CGF.EmitLValue(cast<DeclRefExpr>(I->getAssociatedExpression()))
5304                .getPointer();
5305 
5306       // If the variable is a pointer and is being dereferenced (i.e. is not
5307       // the last component), the base has to be the pointer itself, not its
5308       // reference. References are ignored for mapping purposes.
5309       QualType Ty =
5310           I->getAssociatedDeclaration()->getType().getNonReferenceType();
5311       if (Ty->isAnyPointerType() && std::next(I) != CE) {
5312         auto PtrAddr = CGF.MakeNaturalAlignAddrLValue(BP, Ty);
5313         BP = CGF.EmitLoadOfPointerLValue(PtrAddr.getAddress(),
5314                                          Ty->castAs<PointerType>())
5315                  .getPointer();
5316 
5317         // We do not need to generate individual map information for the
5318         // pointer, it can be associated with the combined storage.
5319         ++I;
5320       }
5321     }
5322 
5323     for (; I != CE; ++I) {
5324       auto Next = std::next(I);
5325 
5326       // We need to generate the addresses and sizes if this is the last
5327       // component, if the component is a pointer or if it is an array section
5328       // whose length can't be proved to be one. If this is a pointer, it
5329       // becomes the base address for the following components.
5330 
5331       // A final array section, is one whose length can't be proved to be one.
5332       bool IsFinalArraySection =
5333           isFinalArraySectionExpression(I->getAssociatedExpression());
5334 
5335       // Get information on whether the element is a pointer. Have to do a
5336       // special treatment for array sections given that they are built-in
5337       // types.
5338       const auto *OASE =
5339           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
5340       bool IsPointer =
5341           (OASE &&
5342            OMPArraySectionExpr::getBaseOriginalType(OASE)
5343                .getCanonicalType()
5344                ->isAnyPointerType()) ||
5345           I->getAssociatedExpression()->getType()->isAnyPointerType();
5346 
5347       if (Next == CE || IsPointer || IsFinalArraySection) {
5348 
5349         // If this is not the last component, we expect the pointer to be
5350         // associated with an array expression or member expression.
5351         assert((Next == CE ||
5352                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
5353                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
5354                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
5355                "Unexpected expression");
5356 
5357         auto *LB = CGF.EmitLValue(I->getAssociatedExpression()).getPointer();
5358         auto *Size = getExprTypeSize(I->getAssociatedExpression());
5359 
5360         // If we have a member expression and the current component is a
5361         // reference, we have to map the reference too. Whenever we have a
5362         // reference, the section that reference refers to is going to be a
5363         // load instruction from the storage assigned to the reference.
5364         if (isa<MemberExpr>(I->getAssociatedExpression()) &&
5365             I->getAssociatedDeclaration()->getType()->isReferenceType()) {
5366           auto *LI = cast<llvm::LoadInst>(LB);
5367           auto *RefAddr = LI->getPointerOperand();
5368 
5369           BasePointers.push_back(BP);
5370           Pointers.push_back(RefAddr);
5371           Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
5372           Types.push_back(getMapTypeBits(
5373               /*MapType*/ OMPC_MAP_alloc, /*MapTypeModifier=*/OMPC_MAP_unknown,
5374               !IsExpressionFirstInfo, IsCaptureFirstInfo));
5375           IsExpressionFirstInfo = false;
5376           IsCaptureFirstInfo = false;
5377           // The reference will be the next base address.
5378           BP = RefAddr;
5379         }
5380 
5381         BasePointers.push_back(BP);
5382         Pointers.push_back(LB);
5383         Sizes.push_back(Size);
5384 
5385         // We need to add a pointer flag for each map that comes from the
5386         // same expression except for the first one. We also need to signal
5387         // this map is the first one that relates with the current capture
5388         // (there is a set of entries for each capture).
5389         Types.push_back(getMapTypeBits(MapType, MapTypeModifier,
5390                                        !IsExpressionFirstInfo,
5391                                        IsCaptureFirstInfo));
5392 
5393         // If we have a final array section, we are done with this expression.
5394         if (IsFinalArraySection)
5395           break;
5396 
5397         // The pointer becomes the base for the next element.
5398         if (Next != CE)
5399           BP = LB;
5400 
5401         IsExpressionFirstInfo = false;
5402         IsCaptureFirstInfo = false;
5403         continue;
5404       }
5405     }
5406   }
5407 
5408   /// \brief Return the adjusted map modifiers if the declaration a capture
5409   /// refers to appears in a first-private clause. This is expected to be used
5410   /// only with directives that start with 'target'.
5411   unsigned adjustMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap,
5412                                                unsigned CurrentModifiers) {
5413     assert(Cap.capturesVariable() && "Expected capture by reference only!");
5414 
5415     // A first private variable captured by reference will use only the
5416     // 'private ptr' and 'map to' flag. Return the right flags if the captured
5417     // declaration is known as first-private in this handler.
5418     if (FirstPrivateDecls.count(Cap.getCapturedVar()))
5419       return MappableExprsHandler::OMP_MAP_PRIVATE_PTR |
5420              MappableExprsHandler::OMP_MAP_TO;
5421 
5422     // We didn't modify anything.
5423     return CurrentModifiers;
5424   }
5425 
5426 public:
5427   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
5428       : CurDir(Dir), CGF(CGF) {
5429     // Extract firstprivate clause information.
5430     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
5431       for (const auto *D : C->varlists())
5432         FirstPrivateDecls.insert(
5433             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
5434     // Extract device pointer clause information.
5435     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
5436       for (auto L : C->component_lists())
5437         DevPointersMap[L.first].push_back(L.second);
5438   }
5439 
5440   /// \brief Generate all the base pointers, section pointers, sizes and map
5441   /// types for the extracted mappable expressions. Also, for each item that
5442   /// relates with a device pointer, a pair of the relevant declaration and
5443   /// index where it occurs is appended to the device pointers info array.
5444   void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
5445                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
5446                        MapFlagsArrayTy &Types) const {
5447     BasePointers.clear();
5448     Pointers.clear();
5449     Sizes.clear();
5450     Types.clear();
5451 
5452     struct MapInfo {
5453       /// Kind that defines how a device pointer has to be returned.
5454       enum ReturnPointerKind {
5455         // Don't have to return any pointer.
5456         RPK_None,
5457         // Pointer is the base of the declaration.
5458         RPK_Base,
5459         // Pointer is a member of the base declaration - 'this'
5460         RPK_Member,
5461         // Pointer is a reference and a member of the base declaration - 'this'
5462         RPK_MemberReference,
5463       };
5464       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
5465       OpenMPMapClauseKind MapType;
5466       OpenMPMapClauseKind MapTypeModifier;
5467       ReturnPointerKind ReturnDevicePointer;
5468 
5469       MapInfo()
5470           : MapType(OMPC_MAP_unknown), MapTypeModifier(OMPC_MAP_unknown),
5471             ReturnDevicePointer(RPK_None) {}
5472       MapInfo(
5473           OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
5474           OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier,
5475           ReturnPointerKind ReturnDevicePointer)
5476           : Components(Components), MapType(MapType),
5477             MapTypeModifier(MapTypeModifier),
5478             ReturnDevicePointer(ReturnDevicePointer) {}
5479     };
5480 
5481     // We have to process the component lists that relate with the same
5482     // declaration in a single chunk so that we can generate the map flags
5483     // correctly. Therefore, we organize all lists in a map.
5484     llvm::DenseMap<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
5485 
5486     // Helper function to fill the information map for the different supported
5487     // clauses.
5488     auto &&InfoGen = [&Info](
5489         const ValueDecl *D,
5490         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
5491         OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier,
5492         MapInfo::ReturnPointerKind ReturnDevicePointer) {
5493       const ValueDecl *VD =
5494           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
5495       Info[VD].push_back({L, MapType, MapModifier, ReturnDevicePointer});
5496     };
5497 
5498     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
5499     for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
5500       for (auto L : C->component_lists())
5501         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier(),
5502                 MapInfo::RPK_None);
5503     for (auto *C : this->CurDir.getClausesOfKind<OMPToClause>())
5504       for (auto L : C->component_lists())
5505         InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown,
5506                 MapInfo::RPK_None);
5507     for (auto *C : this->CurDir.getClausesOfKind<OMPFromClause>())
5508       for (auto L : C->component_lists())
5509         InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown,
5510                 MapInfo::RPK_None);
5511 
5512     // Look at the use_device_ptr clause information and mark the existing map
5513     // entries as such. If there is no map information for an entry in the
5514     // use_device_ptr list, we create one with map type 'alloc' and zero size
5515     // section. It is the user fault if that was not mapped before.
5516     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
5517     for (auto *C : this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>())
5518       for (auto L : C->component_lists()) {
5519         assert(!L.second.empty() && "Not expecting empty list of components!");
5520         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
5521         VD = cast<ValueDecl>(VD->getCanonicalDecl());
5522         auto *IE = L.second.back().getAssociatedExpression();
5523         // If the first component is a member expression, we have to look into
5524         // 'this', which maps to null in the map of map information. Otherwise
5525         // look directly for the information.
5526         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
5527 
5528         // We potentially have map information for this declaration already.
5529         // Look for the first set of components that refer to it.
5530         if (It != Info.end()) {
5531           auto CI = std::find_if(
5532               It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
5533                 return MI.Components.back().getAssociatedDeclaration() == VD;
5534               });
5535           // If we found a map entry, signal that the pointer has to be returned
5536           // and move on to the next declaration.
5537           if (CI != It->second.end()) {
5538             CI->ReturnDevicePointer = isa<MemberExpr>(IE)
5539                                           ? (VD->getType()->isReferenceType()
5540                                                  ? MapInfo::RPK_MemberReference
5541                                                  : MapInfo::RPK_Member)
5542                                           : MapInfo::RPK_Base;
5543             continue;
5544           }
5545         }
5546 
5547         // We didn't find any match in our map information - generate a zero
5548         // size array section.
5549         // FIXME: MSVC 2013 seems to require this-> to find member CGF.
5550         llvm::Value *Ptr =
5551             this->CGF
5552                 .EmitLoadOfLValue(this->CGF.EmitLValue(IE), SourceLocation())
5553                 .getScalarVal();
5554         BasePointers.push_back({Ptr, VD});
5555         Pointers.push_back(Ptr);
5556         Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy));
5557         Types.push_back(OMP_MAP_RETURN_PTR | OMP_MAP_FIRST_REF);
5558       }
5559 
5560     for (auto &M : Info) {
5561       // We need to know when we generate information for the first component
5562       // associated with a capture, because the mapping flags depend on it.
5563       bool IsFirstComponentList = true;
5564       for (MapInfo &L : M.second) {
5565         assert(!L.Components.empty() &&
5566                "Not expecting declaration with no component lists.");
5567 
5568         // Remember the current base pointer index.
5569         unsigned CurrentBasePointersIdx = BasePointers.size();
5570         // FIXME: MSVC 2013 seems to require this-> to find the member method.
5571         this->generateInfoForComponentList(L.MapType, L.MapTypeModifier,
5572                                            L.Components, BasePointers, Pointers,
5573                                            Sizes, Types, IsFirstComponentList);
5574 
5575         // If this entry relates with a device pointer, set the relevant
5576         // declaration and add the 'return pointer' flag.
5577         if (IsFirstComponentList &&
5578             L.ReturnDevicePointer != MapInfo::RPK_None) {
5579           // If the pointer is not the base of the map, we need to skip the
5580           // base. If it is a reference in a member field, we also need to skip
5581           // the map of the reference.
5582           if (L.ReturnDevicePointer != MapInfo::RPK_Base) {
5583             ++CurrentBasePointersIdx;
5584             if (L.ReturnDevicePointer == MapInfo::RPK_MemberReference)
5585               ++CurrentBasePointersIdx;
5586           }
5587           assert(BasePointers.size() > CurrentBasePointersIdx &&
5588                  "Unexpected number of mapped base pointers.");
5589 
5590           auto *RelevantVD = L.Components.back().getAssociatedDeclaration();
5591           assert(RelevantVD &&
5592                  "No relevant declaration related with device pointer??");
5593 
5594           BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
5595           Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PTR;
5596         }
5597         IsFirstComponentList = false;
5598       }
5599     }
5600   }
5601 
5602   /// \brief Generate the base pointers, section pointers, sizes and map types
5603   /// associated to a given capture.
5604   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
5605                               llvm::Value *Arg,
5606                               MapBaseValuesArrayTy &BasePointers,
5607                               MapValuesArrayTy &Pointers,
5608                               MapValuesArrayTy &Sizes,
5609                               MapFlagsArrayTy &Types) const {
5610     assert(!Cap->capturesVariableArrayType() &&
5611            "Not expecting to generate map info for a variable array type!");
5612 
5613     BasePointers.clear();
5614     Pointers.clear();
5615     Sizes.clear();
5616     Types.clear();
5617 
5618     // We need to know when we generating information for the first component
5619     // associated with a capture, because the mapping flags depend on it.
5620     bool IsFirstComponentList = true;
5621 
5622     const ValueDecl *VD =
5623         Cap->capturesThis()
5624             ? nullptr
5625             : cast<ValueDecl>(Cap->getCapturedVar()->getCanonicalDecl());
5626 
5627     // If this declaration appears in a is_device_ptr clause we just have to
5628     // pass the pointer by value. If it is a reference to a declaration, we just
5629     // pass its value, otherwise, if it is a member expression, we need to map
5630     // 'to' the field.
5631     if (!VD) {
5632       auto It = DevPointersMap.find(VD);
5633       if (It != DevPointersMap.end()) {
5634         for (auto L : It->second) {
5635           generateInfoForComponentList(
5636               /*MapType=*/OMPC_MAP_to, /*MapTypeModifier=*/OMPC_MAP_unknown, L,
5637               BasePointers, Pointers, Sizes, Types, IsFirstComponentList);
5638           IsFirstComponentList = false;
5639         }
5640         return;
5641       }
5642     } else if (DevPointersMap.count(VD)) {
5643       BasePointers.push_back({Arg, VD});
5644       Pointers.push_back(Arg);
5645       Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
5646       Types.push_back(OMP_MAP_PRIVATE_VAL | OMP_MAP_FIRST_REF);
5647       return;
5648     }
5649 
5650     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
5651     for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
5652       for (auto L : C->decl_component_lists(VD)) {
5653         assert(L.first == VD &&
5654                "We got information for the wrong declaration??");
5655         assert(!L.second.empty() &&
5656                "Not expecting declaration with no component lists.");
5657         generateInfoForComponentList(C->getMapType(), C->getMapTypeModifier(),
5658                                      L.second, BasePointers, Pointers, Sizes,
5659                                      Types, IsFirstComponentList);
5660         IsFirstComponentList = false;
5661       }
5662 
5663     return;
5664   }
5665 
5666   /// \brief Generate the default map information for a given capture \a CI,
5667   /// record field declaration \a RI and captured value \a CV.
5668   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
5669                               const FieldDecl &RI, llvm::Value *CV,
5670                               MapBaseValuesArrayTy &CurBasePointers,
5671                               MapValuesArrayTy &CurPointers,
5672                               MapValuesArrayTy &CurSizes,
5673                               MapFlagsArrayTy &CurMapTypes) {
5674 
5675     // Do the default mapping.
5676     if (CI.capturesThis()) {
5677       CurBasePointers.push_back(CV);
5678       CurPointers.push_back(CV);
5679       const PointerType *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
5680       CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType()));
5681       // Default map type.
5682       CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
5683     } else if (CI.capturesVariableByCopy()) {
5684       CurBasePointers.push_back(CV);
5685       CurPointers.push_back(CV);
5686       if (!RI.getType()->isAnyPointerType()) {
5687         // We have to signal to the runtime captures passed by value that are
5688         // not pointers.
5689         CurMapTypes.push_back(OMP_MAP_PRIVATE_VAL);
5690         CurSizes.push_back(CGF.getTypeSize(RI.getType()));
5691       } else {
5692         // Pointers are implicitly mapped with a zero size and no flags
5693         // (other than first map that is added for all implicit maps).
5694         CurMapTypes.push_back(0u);
5695         CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy));
5696       }
5697     } else {
5698       assert(CI.capturesVariable() && "Expected captured reference.");
5699       CurBasePointers.push_back(CV);
5700       CurPointers.push_back(CV);
5701 
5702       const ReferenceType *PtrTy =
5703           cast<ReferenceType>(RI.getType().getTypePtr());
5704       QualType ElementType = PtrTy->getPointeeType();
5705       CurSizes.push_back(CGF.getTypeSize(ElementType));
5706       // The default map type for a scalar/complex type is 'to' because by
5707       // default the value doesn't have to be retrieved. For an aggregate
5708       // type, the default is 'tofrom'.
5709       CurMapTypes.push_back(ElementType->isAggregateType()
5710                                 ? (OMP_MAP_TO | OMP_MAP_FROM)
5711                                 : OMP_MAP_TO);
5712 
5713       // If we have a capture by reference we may need to add the private
5714       // pointer flag if the base declaration shows in some first-private
5715       // clause.
5716       CurMapTypes.back() =
5717           adjustMapModifiersForPrivateClauses(CI, CurMapTypes.back());
5718     }
5719     // Every default map produces a single argument, so, it is always the
5720     // first one.
5721     CurMapTypes.back() |= OMP_MAP_FIRST_REF;
5722   }
5723 };
5724 
5725 enum OpenMPOffloadingReservedDeviceIDs {
5726   /// \brief Device ID if the device was not defined, runtime should get it
5727   /// from environment variables in the spec.
5728   OMP_DEVICEID_UNDEF = -1,
5729 };
5730 } // anonymous namespace
5731 
5732 /// \brief Emit the arrays used to pass the captures and map information to the
5733 /// offloading runtime library. If there is no map or capture information,
5734 /// return nullptr by reference.
5735 static void
5736 emitOffloadingArrays(CodeGenFunction &CGF,
5737                      MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
5738                      MappableExprsHandler::MapValuesArrayTy &Pointers,
5739                      MappableExprsHandler::MapValuesArrayTy &Sizes,
5740                      MappableExprsHandler::MapFlagsArrayTy &MapTypes,
5741                      CGOpenMPRuntime::TargetDataInfo &Info) {
5742   auto &CGM = CGF.CGM;
5743   auto &Ctx = CGF.getContext();
5744 
5745   // Reset the array information.
5746   Info.clearArrayInfo();
5747   Info.NumberOfPtrs = BasePointers.size();
5748 
5749   if (Info.NumberOfPtrs) {
5750     // Detect if we have any capture size requiring runtime evaluation of the
5751     // size so that a constant array could be eventually used.
5752     bool hasRuntimeEvaluationCaptureSize = false;
5753     for (auto *S : Sizes)
5754       if (!isa<llvm::Constant>(S)) {
5755         hasRuntimeEvaluationCaptureSize = true;
5756         break;
5757       }
5758 
5759     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
5760     QualType PointerArrayType =
5761         Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
5762                                  /*IndexTypeQuals=*/0);
5763 
5764     Info.BasePointersArray =
5765         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
5766     Info.PointersArray =
5767         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
5768 
5769     // If we don't have any VLA types or other types that require runtime
5770     // evaluation, we can use a constant array for the map sizes, otherwise we
5771     // need to fill up the arrays as we do for the pointers.
5772     if (hasRuntimeEvaluationCaptureSize) {
5773       QualType SizeArrayType = Ctx.getConstantArrayType(
5774           Ctx.getSizeType(), PointerNumAP, ArrayType::Normal,
5775           /*IndexTypeQuals=*/0);
5776       Info.SizesArray =
5777           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
5778     } else {
5779       // We expect all the sizes to be constant, so we collect them to create
5780       // a constant array.
5781       SmallVector<llvm::Constant *, 16> ConstSizes;
5782       for (auto S : Sizes)
5783         ConstSizes.push_back(cast<llvm::Constant>(S));
5784 
5785       auto *SizesArrayInit = llvm::ConstantArray::get(
5786           llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes);
5787       auto *SizesArrayGbl = new llvm::GlobalVariable(
5788           CGM.getModule(), SizesArrayInit->getType(),
5789           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
5790           SizesArrayInit, ".offload_sizes");
5791       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
5792       Info.SizesArray = SizesArrayGbl;
5793     }
5794 
5795     // The map types are always constant so we don't need to generate code to
5796     // fill arrays. Instead, we create an array constant.
5797     llvm::Constant *MapTypesArrayInit =
5798         llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes);
5799     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
5800         CGM.getModule(), MapTypesArrayInit->getType(),
5801         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
5802         MapTypesArrayInit, ".offload_maptypes");
5803     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
5804     Info.MapTypesArray = MapTypesArrayGbl;
5805 
5806     for (unsigned i = 0; i < Info.NumberOfPtrs; ++i) {
5807       llvm::Value *BPVal = *BasePointers[i];
5808       if (BPVal->getType()->isPointerTy())
5809         BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy);
5810       else {
5811         assert(BPVal->getType()->isIntegerTy() &&
5812                "If not a pointer, the value type must be an integer.");
5813         BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy);
5814       }
5815       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
5816           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
5817           Info.BasePointersArray, 0, i);
5818       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
5819       CGF.Builder.CreateStore(BPVal, BPAddr);
5820 
5821       if (Info.requiresDevicePointerInfo())
5822         if (auto *DevVD = BasePointers[i].getDevicePtrDecl())
5823           Info.CaptureDeviceAddrMap.insert(std::make_pair(DevVD, BPAddr));
5824 
5825       llvm::Value *PVal = Pointers[i];
5826       if (PVal->getType()->isPointerTy())
5827         PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy);
5828       else {
5829         assert(PVal->getType()->isIntegerTy() &&
5830                "If not a pointer, the value type must be an integer.");
5831         PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy);
5832       }
5833       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
5834           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
5835           Info.PointersArray, 0, i);
5836       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
5837       CGF.Builder.CreateStore(PVal, PAddr);
5838 
5839       if (hasRuntimeEvaluationCaptureSize) {
5840         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
5841             llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs),
5842             Info.SizesArray,
5843             /*Idx0=*/0,
5844             /*Idx1=*/i);
5845         Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType()));
5846         CGF.Builder.CreateStore(
5847             CGF.Builder.CreateIntCast(Sizes[i], CGM.SizeTy, /*isSigned=*/true),
5848             SAddr);
5849       }
5850     }
5851   }
5852 }
5853 /// \brief Emit the arguments to be passed to the runtime library based on the
5854 /// arrays of pointers, sizes and map types.
5855 static void emitOffloadingArraysArgument(
5856     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
5857     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
5858     llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
5859   auto &CGM = CGF.CGM;
5860   if (Info.NumberOfPtrs) {
5861     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
5862         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
5863         Info.BasePointersArray,
5864         /*Idx0=*/0, /*Idx1=*/0);
5865     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
5866         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
5867         Info.PointersArray,
5868         /*Idx0=*/0,
5869         /*Idx1=*/0);
5870     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
5871         llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray,
5872         /*Idx0=*/0, /*Idx1=*/0);
5873     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
5874         llvm::ArrayType::get(CGM.Int32Ty, Info.NumberOfPtrs),
5875         Info.MapTypesArray,
5876         /*Idx0=*/0,
5877         /*Idx1=*/0);
5878   } else {
5879     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
5880     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
5881     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo());
5882     MapTypesArrayArg =
5883         llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo());
5884   }
5885 }
5886 
5887 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
5888                                      const OMPExecutableDirective &D,
5889                                      llvm::Value *OutlinedFn,
5890                                      llvm::Value *OutlinedFnID,
5891                                      const Expr *IfCond, const Expr *Device,
5892                                      ArrayRef<llvm::Value *> CapturedVars) {
5893   if (!CGF.HaveInsertPoint())
5894     return;
5895 
5896   assert(OutlinedFn && "Invalid outlined function!");
5897 
5898   auto &Ctx = CGF.getContext();
5899 
5900   // Fill up the arrays with all the captured variables.
5901   MappableExprsHandler::MapValuesArrayTy KernelArgs;
5902   MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
5903   MappableExprsHandler::MapValuesArrayTy Pointers;
5904   MappableExprsHandler::MapValuesArrayTy Sizes;
5905   MappableExprsHandler::MapFlagsArrayTy MapTypes;
5906 
5907   MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
5908   MappableExprsHandler::MapValuesArrayTy CurPointers;
5909   MappableExprsHandler::MapValuesArrayTy CurSizes;
5910   MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
5911 
5912   // Get mappable expression information.
5913   MappableExprsHandler MEHandler(D, CGF);
5914 
5915   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
5916   auto RI = CS.getCapturedRecordDecl()->field_begin();
5917   auto CV = CapturedVars.begin();
5918   for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
5919                                             CE = CS.capture_end();
5920        CI != CE; ++CI, ++RI, ++CV) {
5921     StringRef Name;
5922     QualType Ty;
5923 
5924     CurBasePointers.clear();
5925     CurPointers.clear();
5926     CurSizes.clear();
5927     CurMapTypes.clear();
5928 
5929     // VLA sizes are passed to the outlined region by copy and do not have map
5930     // information associated.
5931     if (CI->capturesVariableArrayType()) {
5932       CurBasePointers.push_back(*CV);
5933       CurPointers.push_back(*CV);
5934       CurSizes.push_back(CGF.getTypeSize(RI->getType()));
5935       // Copy to the device as an argument. No need to retrieve it.
5936       CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_PRIVATE_VAL |
5937                             MappableExprsHandler::OMP_MAP_FIRST_REF);
5938     } else {
5939       // If we have any information in the map clause, we use it, otherwise we
5940       // just do a default mapping.
5941       MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
5942                                        CurSizes, CurMapTypes);
5943       if (CurBasePointers.empty())
5944         MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
5945                                          CurPointers, CurSizes, CurMapTypes);
5946     }
5947     // We expect to have at least an element of information for this capture.
5948     assert(!CurBasePointers.empty() && "Non-existing map pointer for capture!");
5949     assert(CurBasePointers.size() == CurPointers.size() &&
5950            CurBasePointers.size() == CurSizes.size() &&
5951            CurBasePointers.size() == CurMapTypes.size() &&
5952            "Inconsistent map information sizes!");
5953 
5954     // The kernel args are always the first elements of the base pointers
5955     // associated with a capture.
5956     KernelArgs.push_back(*CurBasePointers.front());
5957     // We need to append the results of this capture to what we already have.
5958     BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
5959     Pointers.append(CurPointers.begin(), CurPointers.end());
5960     Sizes.append(CurSizes.begin(), CurSizes.end());
5961     MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
5962   }
5963 
5964   // Keep track on whether the host function has to be executed.
5965   auto OffloadErrorQType =
5966       Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true);
5967   auto OffloadError = CGF.MakeAddrLValue(
5968       CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"),
5969       OffloadErrorQType);
5970   CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty),
5971                         OffloadError);
5972 
5973   // Fill up the pointer arrays and transfer execution to the device.
5974   auto &&ThenGen = [&Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes, Device,
5975                     OutlinedFnID, OffloadError, OffloadErrorQType,
5976                     &D](CodeGenFunction &CGF, PrePostActionTy &) {
5977     auto &RT = CGF.CGM.getOpenMPRuntime();
5978     // Emit the offloading arrays.
5979     TargetDataInfo Info;
5980     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
5981     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
5982                                  Info.PointersArray, Info.SizesArray,
5983                                  Info.MapTypesArray, Info);
5984 
5985     // On top of the arrays that were filled up, the target offloading call
5986     // takes as arguments the device id as well as the host pointer. The host
5987     // pointer is used by the runtime library to identify the current target
5988     // region, so it only has to be unique and not necessarily point to
5989     // anything. It could be the pointer to the outlined function that
5990     // implements the target region, but we aren't using that so that the
5991     // compiler doesn't need to keep that, and could therefore inline the host
5992     // function if proven worthwhile during optimization.
5993 
5994     // From this point on, we need to have an ID of the target region defined.
5995     assert(OutlinedFnID && "Invalid outlined function ID!");
5996 
5997     // Emit device ID if any.
5998     llvm::Value *DeviceID;
5999     if (Device)
6000       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
6001                                            CGF.Int32Ty, /*isSigned=*/true);
6002     else
6003       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
6004 
6005     // Emit the number of elements in the offloading arrays.
6006     llvm::Value *PointerNum = CGF.Builder.getInt32(BasePointers.size());
6007 
6008     // Return value of the runtime offloading call.
6009     llvm::Value *Return;
6010 
6011     auto *NumTeams = emitNumTeamsClauseForTargetDirective(RT, CGF, D);
6012     auto *ThreadLimit = emitThreadLimitClauseForTargetDirective(RT, CGF, D);
6013 
6014     // If we have NumTeams defined this means that we have an enclosed teams
6015     // region. Therefore we also expect to have ThreadLimit defined. These two
6016     // values should be defined in the presence of a teams directive, regardless
6017     // of having any clauses associated. If the user is using teams but no
6018     // clauses, these two values will be the default that should be passed to
6019     // the runtime library - a 32-bit integer with the value zero.
6020     if (NumTeams) {
6021       assert(ThreadLimit && "Thread limit expression should be available along "
6022                             "with number of teams.");
6023       llvm::Value *OffloadingArgs[] = {
6024           DeviceID,           OutlinedFnID,
6025           PointerNum,         Info.BasePointersArray,
6026           Info.PointersArray, Info.SizesArray,
6027           Info.MapTypesArray, NumTeams,
6028           ThreadLimit};
6029       Return = CGF.EmitRuntimeCall(
6030           RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs);
6031     } else {
6032       llvm::Value *OffloadingArgs[] = {
6033           DeviceID,           OutlinedFnID,
6034           PointerNum,         Info.BasePointersArray,
6035           Info.PointersArray, Info.SizesArray,
6036           Info.MapTypesArray};
6037       Return = CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target),
6038                                    OffloadingArgs);
6039     }
6040 
6041     CGF.EmitStoreOfScalar(Return, OffloadError);
6042   };
6043 
6044   // Notify that the host version must be executed.
6045   auto &&ElseGen = [OffloadError](CodeGenFunction &CGF, PrePostActionTy &) {
6046     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/-1u),
6047                           OffloadError);
6048   };
6049 
6050   // If we have a target function ID it means that we need to support
6051   // offloading, otherwise, just execute on the host. We need to execute on host
6052   // regardless of the conditional in the if clause if, e.g., the user do not
6053   // specify target triples.
6054   if (OutlinedFnID) {
6055     if (IfCond)
6056       emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
6057     else {
6058       RegionCodeGenTy ThenRCG(ThenGen);
6059       ThenRCG(CGF);
6060     }
6061   } else {
6062     RegionCodeGenTy ElseRCG(ElseGen);
6063     ElseRCG(CGF);
6064   }
6065 
6066   // Check the error code and execute the host version if required.
6067   auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed");
6068   auto OffloadContBlock = CGF.createBasicBlock("omp_offload.cont");
6069   auto OffloadErrorVal = CGF.EmitLoadOfScalar(OffloadError, SourceLocation());
6070   auto Failed = CGF.Builder.CreateIsNotNull(OffloadErrorVal);
6071   CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
6072 
6073   CGF.EmitBlock(OffloadFailedBlock);
6074   CGF.Builder.CreateCall(OutlinedFn, KernelArgs);
6075   CGF.EmitBranch(OffloadContBlock);
6076 
6077   CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
6078 }
6079 
6080 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
6081                                                     StringRef ParentName) {
6082   if (!S)
6083     return;
6084 
6085   // If we find a OMP target directive, codegen the outline function and
6086   // register the result.
6087   // FIXME: Add other directives with target when they become supported.
6088   bool isTargetDirective = isa<OMPTargetDirective>(S);
6089 
6090   if (isTargetDirective) {
6091     auto *E = cast<OMPExecutableDirective>(S);
6092     unsigned DeviceID;
6093     unsigned FileID;
6094     unsigned Line;
6095     getTargetEntryUniqueInfo(CGM.getContext(), E->getLocStart(), DeviceID,
6096                              FileID, Line);
6097 
6098     // Is this a target region that should not be emitted as an entry point? If
6099     // so just signal we are done with this target region.
6100     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
6101                                                             ParentName, Line))
6102       return;
6103 
6104     llvm::Function *Fn;
6105     llvm::Constant *Addr;
6106     std::tie(Fn, Addr) =
6107         CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction(
6108             CGM, cast<OMPTargetDirective>(*E), ParentName,
6109             /*isOffloadEntry=*/true);
6110     assert(Fn && Addr && "Target region emission failed.");
6111     return;
6112   }
6113 
6114   if (const OMPExecutableDirective *E = dyn_cast<OMPExecutableDirective>(S)) {
6115     if (!E->hasAssociatedStmt())
6116       return;
6117 
6118     scanForTargetRegionsFunctions(
6119         cast<CapturedStmt>(E->getAssociatedStmt())->getCapturedStmt(),
6120         ParentName);
6121     return;
6122   }
6123 
6124   // If this is a lambda function, look into its body.
6125   if (auto *L = dyn_cast<LambdaExpr>(S))
6126     S = L->getBody();
6127 
6128   // Keep looking for target regions recursively.
6129   for (auto *II : S->children())
6130     scanForTargetRegionsFunctions(II, ParentName);
6131 }
6132 
6133 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
6134   auto &FD = *cast<FunctionDecl>(GD.getDecl());
6135 
6136   // If emitting code for the host, we do not process FD here. Instead we do
6137   // the normal code generation.
6138   if (!CGM.getLangOpts().OpenMPIsDevice)
6139     return false;
6140 
6141   // Try to detect target regions in the function.
6142   scanForTargetRegionsFunctions(FD.getBody(), CGM.getMangledName(GD));
6143 
6144   // We should not emit any function other that the ones created during the
6145   // scanning. Therefore, we signal that this function is completely dealt
6146   // with.
6147   return true;
6148 }
6149 
6150 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
6151   if (!CGM.getLangOpts().OpenMPIsDevice)
6152     return false;
6153 
6154   // Check if there are Ctors/Dtors in this declaration and look for target
6155   // regions in it. We use the complete variant to produce the kernel name
6156   // mangling.
6157   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
6158   if (auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
6159     for (auto *Ctor : RD->ctors()) {
6160       StringRef ParentName =
6161           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
6162       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
6163     }
6164     auto *Dtor = RD->getDestructor();
6165     if (Dtor) {
6166       StringRef ParentName =
6167           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
6168       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
6169     }
6170   }
6171 
6172   // If we are in target mode we do not emit any global (declare target is not
6173   // implemented yet). Therefore we signal that GD was processed in this case.
6174   return true;
6175 }
6176 
6177 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
6178   auto *VD = GD.getDecl();
6179   if (isa<FunctionDecl>(VD))
6180     return emitTargetFunctions(GD);
6181 
6182   return emitTargetGlobalVariable(GD);
6183 }
6184 
6185 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
6186   // If we have offloading in the current module, we need to emit the entries
6187   // now and register the offloading descriptor.
6188   createOffloadEntriesAndInfoMetadata();
6189 
6190   // Create and register the offloading binary descriptors. This is the main
6191   // entity that captures all the information about offloading in the current
6192   // compilation unit.
6193   return createOffloadingBinaryDescriptorRegistration();
6194 }
6195 
6196 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
6197                                     const OMPExecutableDirective &D,
6198                                     SourceLocation Loc,
6199                                     llvm::Value *OutlinedFn,
6200                                     ArrayRef<llvm::Value *> CapturedVars) {
6201   if (!CGF.HaveInsertPoint())
6202     return;
6203 
6204   auto *RTLoc = emitUpdateLocation(CGF, Loc);
6205   CodeGenFunction::RunCleanupsScope Scope(CGF);
6206 
6207   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
6208   llvm::Value *Args[] = {
6209       RTLoc,
6210       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
6211       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
6212   llvm::SmallVector<llvm::Value *, 16> RealArgs;
6213   RealArgs.append(std::begin(Args), std::end(Args));
6214   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
6215 
6216   auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
6217   CGF.EmitRuntimeCall(RTLFn, RealArgs);
6218 }
6219 
6220 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
6221                                          const Expr *NumTeams,
6222                                          const Expr *ThreadLimit,
6223                                          SourceLocation Loc) {
6224   if (!CGF.HaveInsertPoint())
6225     return;
6226 
6227   auto *RTLoc = emitUpdateLocation(CGF, Loc);
6228 
6229   llvm::Value *NumTeamsVal =
6230       (NumTeams)
6231           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
6232                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
6233           : CGF.Builder.getInt32(0);
6234 
6235   llvm::Value *ThreadLimitVal =
6236       (ThreadLimit)
6237           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
6238                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
6239           : CGF.Builder.getInt32(0);
6240 
6241   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
6242   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
6243                                      ThreadLimitVal};
6244   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
6245                       PushNumTeamsArgs);
6246 }
6247 
6248 void CGOpenMPRuntime::emitTargetDataCalls(
6249     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
6250     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
6251   if (!CGF.HaveInsertPoint())
6252     return;
6253 
6254   // Action used to replace the default codegen action and turn privatization
6255   // off.
6256   PrePostActionTy NoPrivAction;
6257 
6258   // Generate the code for the opening of the data environment. Capture all the
6259   // arguments of the runtime call by reference because they are used in the
6260   // closing of the region.
6261   auto &&BeginThenGen = [&D, &CGF, Device, &Info, &CodeGen, &NoPrivAction](
6262       CodeGenFunction &CGF, PrePostActionTy &) {
6263     // Fill up the arrays with all the mapped variables.
6264     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
6265     MappableExprsHandler::MapValuesArrayTy Pointers;
6266     MappableExprsHandler::MapValuesArrayTy Sizes;
6267     MappableExprsHandler::MapFlagsArrayTy MapTypes;
6268 
6269     // Get map clause information.
6270     MappableExprsHandler MCHandler(D, CGF);
6271     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
6272 
6273     // Fill up the arrays and create the arguments.
6274     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
6275 
6276     llvm::Value *BasePointersArrayArg = nullptr;
6277     llvm::Value *PointersArrayArg = nullptr;
6278     llvm::Value *SizesArrayArg = nullptr;
6279     llvm::Value *MapTypesArrayArg = nullptr;
6280     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
6281                                  SizesArrayArg, MapTypesArrayArg, Info);
6282 
6283     // Emit device ID if any.
6284     llvm::Value *DeviceID = nullptr;
6285     if (Device)
6286       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
6287                                            CGF.Int32Ty, /*isSigned=*/true);
6288     else
6289       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
6290 
6291     // Emit the number of elements in the offloading arrays.
6292     auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
6293 
6294     llvm::Value *OffloadingArgs[] = {
6295         DeviceID,         PointerNum,    BasePointersArrayArg,
6296         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
6297     auto &RT = CGF.CGM.getOpenMPRuntime();
6298     CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_begin),
6299                         OffloadingArgs);
6300 
6301     // If device pointer privatization is required, emit the body of the region
6302     // here. It will have to be duplicated: with and without privatization.
6303     if (!Info.CaptureDeviceAddrMap.empty())
6304       CodeGen(CGF);
6305   };
6306 
6307   // Generate code for the closing of the data region.
6308   auto &&EndThenGen = [&CGF, Device, &Info](CodeGenFunction &CGF,
6309                                             PrePostActionTy &) {
6310     assert(Info.isValid() && "Invalid data environment closing arguments.");
6311 
6312     llvm::Value *BasePointersArrayArg = nullptr;
6313     llvm::Value *PointersArrayArg = nullptr;
6314     llvm::Value *SizesArrayArg = nullptr;
6315     llvm::Value *MapTypesArrayArg = nullptr;
6316     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
6317                                  SizesArrayArg, MapTypesArrayArg, Info);
6318 
6319     // Emit device ID if any.
6320     llvm::Value *DeviceID = nullptr;
6321     if (Device)
6322       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
6323                                            CGF.Int32Ty, /*isSigned=*/true);
6324     else
6325       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
6326 
6327     // Emit the number of elements in the offloading arrays.
6328     auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
6329 
6330     llvm::Value *OffloadingArgs[] = {
6331         DeviceID,         PointerNum,    BasePointersArrayArg,
6332         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
6333     auto &RT = CGF.CGM.getOpenMPRuntime();
6334     CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_end),
6335                         OffloadingArgs);
6336   };
6337 
6338   // If we need device pointer privatization, we need to emit the body of the
6339   // region with no privatization in the 'else' branch of the conditional.
6340   // Otherwise, we don't have to do anything.
6341   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
6342                                                          PrePostActionTy &) {
6343     if (!Info.CaptureDeviceAddrMap.empty()) {
6344       CodeGen.setAction(NoPrivAction);
6345       CodeGen(CGF);
6346     }
6347   };
6348 
6349   // We don't have to do anything to close the region if the if clause evaluates
6350   // to false.
6351   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
6352 
6353   if (IfCond) {
6354     emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
6355   } else {
6356     RegionCodeGenTy RCG(BeginThenGen);
6357     RCG(CGF);
6358   }
6359 
6360   // If we don't require privatization of device pointers, we emit the body in
6361   // between the runtime calls. This avoids duplicating the body code.
6362   if (Info.CaptureDeviceAddrMap.empty()) {
6363     CodeGen.setAction(NoPrivAction);
6364     CodeGen(CGF);
6365   }
6366 
6367   if (IfCond) {
6368     emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen);
6369   } else {
6370     RegionCodeGenTy RCG(EndThenGen);
6371     RCG(CGF);
6372   }
6373 }
6374 
6375 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
6376     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
6377     const Expr *Device) {
6378   if (!CGF.HaveInsertPoint())
6379     return;
6380 
6381   assert((isa<OMPTargetEnterDataDirective>(D) ||
6382           isa<OMPTargetExitDataDirective>(D) ||
6383           isa<OMPTargetUpdateDirective>(D)) &&
6384          "Expecting either target enter, exit data, or update directives.");
6385 
6386   // Generate the code for the opening of the data environment.
6387   auto &&ThenGen = [&D, &CGF, Device](CodeGenFunction &CGF, PrePostActionTy &) {
6388     // Fill up the arrays with all the mapped variables.
6389     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
6390     MappableExprsHandler::MapValuesArrayTy Pointers;
6391     MappableExprsHandler::MapValuesArrayTy Sizes;
6392     MappableExprsHandler::MapFlagsArrayTy MapTypes;
6393 
6394     // Get map clause information.
6395     MappableExprsHandler MEHandler(D, CGF);
6396     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
6397 
6398     // Fill up the arrays and create the arguments.
6399     TargetDataInfo Info;
6400     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
6401     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
6402                                  Info.PointersArray, Info.SizesArray,
6403                                  Info.MapTypesArray, Info);
6404 
6405     // Emit device ID if any.
6406     llvm::Value *DeviceID = nullptr;
6407     if (Device)
6408       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
6409                                            CGF.Int32Ty, /*isSigned=*/true);
6410     else
6411       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
6412 
6413     // Emit the number of elements in the offloading arrays.
6414     auto *PointerNum = CGF.Builder.getInt32(BasePointers.size());
6415 
6416     llvm::Value *OffloadingArgs[] = {
6417         DeviceID,           PointerNum,      Info.BasePointersArray,
6418         Info.PointersArray, Info.SizesArray, Info.MapTypesArray};
6419 
6420     auto &RT = CGF.CGM.getOpenMPRuntime();
6421     // Select the right runtime function call for each expected standalone
6422     // directive.
6423     OpenMPRTLFunction RTLFn;
6424     switch (D.getDirectiveKind()) {
6425     default:
6426       llvm_unreachable("Unexpected standalone target data directive.");
6427       break;
6428     case OMPD_target_enter_data:
6429       RTLFn = OMPRTL__tgt_target_data_begin;
6430       break;
6431     case OMPD_target_exit_data:
6432       RTLFn = OMPRTL__tgt_target_data_end;
6433       break;
6434     case OMPD_target_update:
6435       RTLFn = OMPRTL__tgt_target_data_update;
6436       break;
6437     }
6438     CGF.EmitRuntimeCall(RT.createRuntimeFunction(RTLFn), OffloadingArgs);
6439   };
6440 
6441   // In the event we get an if clause, we don't have to take any action on the
6442   // else side.
6443   auto &&ElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
6444 
6445   if (IfCond) {
6446     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
6447   } else {
6448     RegionCodeGenTy ThenGenRCG(ThenGen);
6449     ThenGenRCG(CGF);
6450   }
6451 }
6452 
6453 namespace {
6454   /// Kind of parameter in a function with 'declare simd' directive.
6455   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
6456   /// Attribute set of the parameter.
6457   struct ParamAttrTy {
6458     ParamKindTy Kind = Vector;
6459     llvm::APSInt StrideOrArg;
6460     llvm::APSInt Alignment;
6461   };
6462 } // namespace
6463 
6464 static unsigned evaluateCDTSize(const FunctionDecl *FD,
6465                                 ArrayRef<ParamAttrTy> ParamAttrs) {
6466   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
6467   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
6468   // of that clause. The VLEN value must be power of 2.
6469   // In other case the notion of the function`s "characteristic data type" (CDT)
6470   // is used to compute the vector length.
6471   // CDT is defined in the following order:
6472   //   a) For non-void function, the CDT is the return type.
6473   //   b) If the function has any non-uniform, non-linear parameters, then the
6474   //   CDT is the type of the first such parameter.
6475   //   c) If the CDT determined by a) or b) above is struct, union, or class
6476   //   type which is pass-by-value (except for the type that maps to the
6477   //   built-in complex data type), the characteristic data type is int.
6478   //   d) If none of the above three cases is applicable, the CDT is int.
6479   // The VLEN is then determined based on the CDT and the size of vector
6480   // register of that ISA for which current vector version is generated. The
6481   // VLEN is computed using the formula below:
6482   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
6483   // where vector register size specified in section 3.2.1 Registers and the
6484   // Stack Frame of original AMD64 ABI document.
6485   QualType RetType = FD->getReturnType();
6486   if (RetType.isNull())
6487     return 0;
6488   ASTContext &C = FD->getASTContext();
6489   QualType CDT;
6490   if (!RetType.isNull() && !RetType->isVoidType())
6491     CDT = RetType;
6492   else {
6493     unsigned Offset = 0;
6494     if (auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
6495       if (ParamAttrs[Offset].Kind == Vector)
6496         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
6497       ++Offset;
6498     }
6499     if (CDT.isNull()) {
6500       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
6501         if (ParamAttrs[I + Offset].Kind == Vector) {
6502           CDT = FD->getParamDecl(I)->getType();
6503           break;
6504         }
6505       }
6506     }
6507   }
6508   if (CDT.isNull())
6509     CDT = C.IntTy;
6510   CDT = CDT->getCanonicalTypeUnqualified();
6511   if (CDT->isRecordType() || CDT->isUnionType())
6512     CDT = C.IntTy;
6513   return C.getTypeSize(CDT);
6514 }
6515 
6516 static void
6517 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
6518                            const llvm::APSInt &VLENVal,
6519                            ArrayRef<ParamAttrTy> ParamAttrs,
6520                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
6521   struct ISADataTy {
6522     char ISA;
6523     unsigned VecRegSize;
6524   };
6525   ISADataTy ISAData[] = {
6526       {
6527           'b', 128
6528       }, // SSE
6529       {
6530           'c', 256
6531       }, // AVX
6532       {
6533           'd', 256
6534       }, // AVX2
6535       {
6536           'e', 512
6537       }, // AVX512
6538   };
6539   llvm::SmallVector<char, 2> Masked;
6540   switch (State) {
6541   case OMPDeclareSimdDeclAttr::BS_Undefined:
6542     Masked.push_back('N');
6543     Masked.push_back('M');
6544     break;
6545   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
6546     Masked.push_back('N');
6547     break;
6548   case OMPDeclareSimdDeclAttr::BS_Inbranch:
6549     Masked.push_back('M');
6550     break;
6551   }
6552   for (auto Mask : Masked) {
6553     for (auto &Data : ISAData) {
6554       SmallString<256> Buffer;
6555       llvm::raw_svector_ostream Out(Buffer);
6556       Out << "_ZGV" << Data.ISA << Mask;
6557       if (!VLENVal) {
6558         Out << llvm::APSInt::getUnsigned(Data.VecRegSize /
6559                                          evaluateCDTSize(FD, ParamAttrs));
6560       } else
6561         Out << VLENVal;
6562       for (auto &ParamAttr : ParamAttrs) {
6563         switch (ParamAttr.Kind){
6564         case LinearWithVarStride:
6565           Out << 's' << ParamAttr.StrideOrArg;
6566           break;
6567         case Linear:
6568           Out << 'l';
6569           if (!!ParamAttr.StrideOrArg)
6570             Out << ParamAttr.StrideOrArg;
6571           break;
6572         case Uniform:
6573           Out << 'u';
6574           break;
6575         case Vector:
6576           Out << 'v';
6577           break;
6578         }
6579         if (!!ParamAttr.Alignment)
6580           Out << 'a' << ParamAttr.Alignment;
6581       }
6582       Out << '_' << Fn->getName();
6583       Fn->addFnAttr(Out.str());
6584     }
6585   }
6586 }
6587 
6588 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
6589                                               llvm::Function *Fn) {
6590   ASTContext &C = CGM.getContext();
6591   FD = FD->getCanonicalDecl();
6592   // Map params to their positions in function decl.
6593   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
6594   if (isa<CXXMethodDecl>(FD))
6595     ParamPositions.insert({FD, 0});
6596   unsigned ParamPos = ParamPositions.size();
6597   for (auto *P : FD->parameters()) {
6598     ParamPositions.insert({P->getCanonicalDecl(), ParamPos});
6599     ++ParamPos;
6600   }
6601   for (auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
6602     llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
6603     // Mark uniform parameters.
6604     for (auto *E : Attr->uniforms()) {
6605       E = E->IgnoreParenImpCasts();
6606       unsigned Pos;
6607       if (isa<CXXThisExpr>(E))
6608         Pos = ParamPositions[FD];
6609       else {
6610         auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
6611                         ->getCanonicalDecl();
6612         Pos = ParamPositions[PVD];
6613       }
6614       ParamAttrs[Pos].Kind = Uniform;
6615     }
6616     // Get alignment info.
6617     auto NI = Attr->alignments_begin();
6618     for (auto *E : Attr->aligneds()) {
6619       E = E->IgnoreParenImpCasts();
6620       unsigned Pos;
6621       QualType ParmTy;
6622       if (isa<CXXThisExpr>(E)) {
6623         Pos = ParamPositions[FD];
6624         ParmTy = E->getType();
6625       } else {
6626         auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
6627                         ->getCanonicalDecl();
6628         Pos = ParamPositions[PVD];
6629         ParmTy = PVD->getType();
6630       }
6631       ParamAttrs[Pos].Alignment =
6632           (*NI) ? (*NI)->EvaluateKnownConstInt(C)
6633                 : llvm::APSInt::getUnsigned(
6634                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
6635                           .getQuantity());
6636       ++NI;
6637     }
6638     // Mark linear parameters.
6639     auto SI = Attr->steps_begin();
6640     auto MI = Attr->modifiers_begin();
6641     for (auto *E : Attr->linears()) {
6642       E = E->IgnoreParenImpCasts();
6643       unsigned Pos;
6644       if (isa<CXXThisExpr>(E))
6645         Pos = ParamPositions[FD];
6646       else {
6647         auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
6648                         ->getCanonicalDecl();
6649         Pos = ParamPositions[PVD];
6650       }
6651       auto &ParamAttr = ParamAttrs[Pos];
6652       ParamAttr.Kind = Linear;
6653       if (*SI) {
6654         if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C,
6655                                   Expr::SE_AllowSideEffects)) {
6656           if (auto *DRE = cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
6657             if (auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
6658               ParamAttr.Kind = LinearWithVarStride;
6659               ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
6660                   ParamPositions[StridePVD->getCanonicalDecl()]);
6661             }
6662           }
6663         }
6664       }
6665       ++SI;
6666       ++MI;
6667     }
6668     llvm::APSInt VLENVal;
6669     if (const Expr *VLEN = Attr->getSimdlen())
6670       VLENVal = VLEN->EvaluateKnownConstInt(C);
6671     OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
6672     if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
6673         CGM.getTriple().getArch() == llvm::Triple::x86_64)
6674       emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
6675   }
6676 }
6677 
6678 namespace {
6679 /// Cleanup action for doacross support.
6680 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
6681 public:
6682   static const int DoacrossFinArgs = 2;
6683 
6684 private:
6685   llvm::Value *RTLFn;
6686   llvm::Value *Args[DoacrossFinArgs];
6687 
6688 public:
6689   DoacrossCleanupTy(llvm::Value *RTLFn, ArrayRef<llvm::Value *> CallArgs)
6690       : RTLFn(RTLFn) {
6691     assert(CallArgs.size() == DoacrossFinArgs);
6692     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
6693   }
6694   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
6695     if (!CGF.HaveInsertPoint())
6696       return;
6697     CGF.EmitRuntimeCall(RTLFn, Args);
6698   }
6699 };
6700 } // namespace
6701 
6702 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
6703                                        const OMPLoopDirective &D) {
6704   if (!CGF.HaveInsertPoint())
6705     return;
6706 
6707   ASTContext &C = CGM.getContext();
6708   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
6709   RecordDecl *RD;
6710   if (KmpDimTy.isNull()) {
6711     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
6712     //  kmp_int64 lo; // lower
6713     //  kmp_int64 up; // upper
6714     //  kmp_int64 st; // stride
6715     // };
6716     RD = C.buildImplicitRecord("kmp_dim");
6717     RD->startDefinition();
6718     addFieldToRecordDecl(C, RD, Int64Ty);
6719     addFieldToRecordDecl(C, RD, Int64Ty);
6720     addFieldToRecordDecl(C, RD, Int64Ty);
6721     RD->completeDefinition();
6722     KmpDimTy = C.getRecordType(RD);
6723   } else
6724     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
6725 
6726   Address DimsAddr = CGF.CreateMemTemp(KmpDimTy, "dims");
6727   CGF.EmitNullInitialization(DimsAddr, KmpDimTy);
6728   enum { LowerFD = 0, UpperFD, StrideFD };
6729   // Fill dims with data.
6730   LValue DimsLVal = CGF.MakeAddrLValue(DimsAddr, KmpDimTy);
6731   // dims.upper = num_iterations;
6732   LValue UpperLVal =
6733       CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), UpperFD));
6734   llvm::Value *NumIterVal = CGF.EmitScalarConversion(
6735       CGF.EmitScalarExpr(D.getNumIterations()), D.getNumIterations()->getType(),
6736       Int64Ty, D.getNumIterations()->getExprLoc());
6737   CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
6738   // dims.stride = 1;
6739   LValue StrideLVal =
6740       CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), StrideFD));
6741   CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
6742                         StrideLVal);
6743 
6744   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
6745   // kmp_int32 num_dims, struct kmp_dim * dims);
6746   llvm::Value *Args[] = {emitUpdateLocation(CGF, D.getLocStart()),
6747                          getThreadID(CGF, D.getLocStart()),
6748                          llvm::ConstantInt::getSigned(CGM.Int32Ty, 1),
6749                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6750                              DimsAddr.getPointer(), CGM.VoidPtrTy)};
6751 
6752   llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_init);
6753   CGF.EmitRuntimeCall(RTLFn, Args);
6754   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
6755       emitUpdateLocation(CGF, D.getLocEnd()), getThreadID(CGF, D.getLocEnd())};
6756   llvm::Value *FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
6757   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
6758                                              llvm::makeArrayRef(FiniArgs));
6759 }
6760 
6761 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
6762                                           const OMPDependClause *C) {
6763   QualType Int64Ty =
6764       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
6765   const Expr *CounterVal = C->getCounterValue();
6766   assert(CounterVal);
6767   llvm::Value *CntVal = CGF.EmitScalarConversion(CGF.EmitScalarExpr(CounterVal),
6768                                                  CounterVal->getType(), Int64Ty,
6769                                                  CounterVal->getExprLoc());
6770   Address CntAddr = CGF.CreateMemTemp(Int64Ty, ".cnt.addr");
6771   CGF.EmitStoreOfScalar(CntVal, CntAddr, /*Volatile=*/false, Int64Ty);
6772   llvm::Value *Args[] = {emitUpdateLocation(CGF, C->getLocStart()),
6773                          getThreadID(CGF, C->getLocStart()),
6774                          CntAddr.getPointer()};
6775   llvm::Value *RTLFn;
6776   if (C->getDependencyKind() == OMPC_DEPEND_source)
6777     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
6778   else {
6779     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
6780     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
6781   }
6782   CGF.EmitRuntimeCall(RTLFn, Args);
6783 }
6784 
6785