1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for OpenMP runtime code generation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGOpenMPRuntime.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Decl.h"
19 #include "clang/AST/StmtOpenMP.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/Bitcode/ReaderWriter.h"
22 #include "llvm/IR/CallSite.h"
23 #include "llvm/IR/DerivedTypes.h"
24 #include "llvm/IR/GlobalValue.h"
25 #include "llvm/IR/Value.h"
26 #include "llvm/Support/Format.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <cassert>
29 
30 using namespace clang;
31 using namespace CodeGen;
32 
33 namespace {
34 /// \brief Base class for handling code generation inside OpenMP regions.
35 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
36 public:
37   /// \brief Kinds of OpenMP regions used in codegen.
38   enum CGOpenMPRegionKind {
39     /// \brief Region with outlined function for standalone 'parallel'
40     /// directive.
41     ParallelOutlinedRegion,
42     /// \brief Region with outlined function for standalone 'task' directive.
43     TaskOutlinedRegion,
44     /// \brief Region for constructs that do not require function outlining,
45     /// like 'for', 'sections', 'atomic' etc. directives.
46     InlinedRegion,
47     /// \brief Region with outlined function for standalone 'target' directive.
48     TargetRegion,
49   };
50 
51   CGOpenMPRegionInfo(const CapturedStmt &CS,
52                      const CGOpenMPRegionKind RegionKind,
53                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
54                      bool HasCancel)
55       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
56         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
57 
58   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
59                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
60                      bool HasCancel)
61       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
62         Kind(Kind), HasCancel(HasCancel) {}
63 
64   /// \brief Get a variable or parameter for storing global thread id
65   /// inside OpenMP construct.
66   virtual const VarDecl *getThreadIDVariable() const = 0;
67 
68   /// \brief Emit the captured statement body.
69   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
70 
71   /// \brief Get an LValue for the current ThreadID variable.
72   /// \return LValue for thread id variable. This LValue always has type int32*.
73   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
74 
75   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
76 
77   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
78 
79   bool hasCancel() const { return HasCancel; }
80 
81   static bool classof(const CGCapturedStmtInfo *Info) {
82     return Info->getKind() == CR_OpenMP;
83   }
84 
85 protected:
86   CGOpenMPRegionKind RegionKind;
87   RegionCodeGenTy CodeGen;
88   OpenMPDirectiveKind Kind;
89   bool HasCancel;
90 };
91 
92 /// \brief API for captured statement code generation in OpenMP constructs.
93 class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo {
94 public:
95   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
96                              const RegionCodeGenTy &CodeGen,
97                              OpenMPDirectiveKind Kind, bool HasCancel)
98       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
99                            HasCancel),
100         ThreadIDVar(ThreadIDVar) {
101     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
102   }
103 
104   /// \brief Get a variable or parameter for storing global thread id
105   /// inside OpenMP construct.
106   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
107 
108   /// \brief Get the name of the capture helper.
109   StringRef getHelperName() const override { return ".omp_outlined."; }
110 
111   static bool classof(const CGCapturedStmtInfo *Info) {
112     return CGOpenMPRegionInfo::classof(Info) &&
113            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
114                ParallelOutlinedRegion;
115   }
116 
117 private:
118   /// \brief A variable or parameter storing global thread id for OpenMP
119   /// constructs.
120   const VarDecl *ThreadIDVar;
121 };
122 
123 /// \brief API for captured statement code generation in OpenMP constructs.
124 class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo {
125 public:
126   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
127                                  const VarDecl *ThreadIDVar,
128                                  const RegionCodeGenTy &CodeGen,
129                                  OpenMPDirectiveKind Kind, bool HasCancel)
130       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
131         ThreadIDVar(ThreadIDVar) {
132     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
133   }
134 
135   /// \brief Get a variable or parameter for storing global thread id
136   /// inside OpenMP construct.
137   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
138 
139   /// \brief Get an LValue for the current ThreadID variable.
140   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
141 
142   /// \brief Get the name of the capture helper.
143   StringRef getHelperName() const override { return ".omp_outlined."; }
144 
145   static bool classof(const CGCapturedStmtInfo *Info) {
146     return CGOpenMPRegionInfo::classof(Info) &&
147            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
148                TaskOutlinedRegion;
149   }
150 
151 private:
152   /// \brief A variable or parameter storing global thread id for OpenMP
153   /// constructs.
154   const VarDecl *ThreadIDVar;
155 };
156 
157 /// \brief API for inlined captured statement code generation in OpenMP
158 /// constructs.
159 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
160 public:
161   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
162                             const RegionCodeGenTy &CodeGen,
163                             OpenMPDirectiveKind Kind, bool HasCancel)
164       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
165         OldCSI(OldCSI),
166         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
167 
168   // \brief Retrieve the value of the context parameter.
169   llvm::Value *getContextValue() const override {
170     if (OuterRegionInfo)
171       return OuterRegionInfo->getContextValue();
172     llvm_unreachable("No context value for inlined OpenMP region");
173   }
174 
175   void setContextValue(llvm::Value *V) override {
176     if (OuterRegionInfo) {
177       OuterRegionInfo->setContextValue(V);
178       return;
179     }
180     llvm_unreachable("No context value for inlined OpenMP region");
181   }
182 
183   /// \brief Lookup the captured field decl for a variable.
184   const FieldDecl *lookup(const VarDecl *VD) const override {
185     if (OuterRegionInfo)
186       return OuterRegionInfo->lookup(VD);
187     // If there is no outer outlined region,no need to lookup in a list of
188     // captured variables, we can use the original one.
189     return nullptr;
190   }
191 
192   FieldDecl *getThisFieldDecl() const override {
193     if (OuterRegionInfo)
194       return OuterRegionInfo->getThisFieldDecl();
195     return nullptr;
196   }
197 
198   /// \brief Get a variable or parameter for storing global thread id
199   /// inside OpenMP construct.
200   const VarDecl *getThreadIDVariable() const override {
201     if (OuterRegionInfo)
202       return OuterRegionInfo->getThreadIDVariable();
203     return nullptr;
204   }
205 
206   /// \brief Get the name of the capture helper.
207   StringRef getHelperName() const override {
208     if (auto *OuterRegionInfo = getOldCSI())
209       return OuterRegionInfo->getHelperName();
210     llvm_unreachable("No helper name for inlined OpenMP construct");
211   }
212 
213   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
214 
215   static bool classof(const CGCapturedStmtInfo *Info) {
216     return CGOpenMPRegionInfo::classof(Info) &&
217            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
218   }
219 
220 private:
221   /// \brief CodeGen info about outer OpenMP region.
222   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
223   CGOpenMPRegionInfo *OuterRegionInfo;
224 };
225 
226 /// \brief API for captured statement code generation in OpenMP target
227 /// constructs. For this captures, implicit parameters are used instead of the
228 /// captured fields. The name of the target region has to be unique in a given
229 /// application so it is provided by the client, because only the client has
230 /// the information to generate that.
231 class CGOpenMPTargetRegionInfo : public CGOpenMPRegionInfo {
232 public:
233   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
234                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
235       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
236                            /*HasCancel=*/false),
237         HelperName(HelperName) {}
238 
239   /// \brief This is unused for target regions because each starts executing
240   /// with a single thread.
241   const VarDecl *getThreadIDVariable() const override { return nullptr; }
242 
243   /// \brief Get the name of the capture helper.
244   StringRef getHelperName() const override { return HelperName; }
245 
246   static bool classof(const CGCapturedStmtInfo *Info) {
247     return CGOpenMPRegionInfo::classof(Info) &&
248            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
249   }
250 
251 private:
252   StringRef HelperName;
253 };
254 
255 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
256   llvm_unreachable("No codegen for expressions");
257 }
258 /// \brief API for generation of expressions captured in a innermost OpenMP
259 /// region.
260 class CGOpenMPInnerExprInfo : public CGOpenMPInlinedRegionInfo {
261 public:
262   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
263       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
264                                   OMPD_unknown,
265                                   /*HasCancel=*/false),
266         PrivScope(CGF) {
267     // Make sure the globals captured in the provided statement are local by
268     // using the privatization logic. We assume the same variable is not
269     // captured more than once.
270     for (auto &C : CS.captures()) {
271       if (!C.capturesVariable() && !C.capturesVariableByCopy())
272         continue;
273 
274       const VarDecl *VD = C.getCapturedVar();
275       if (VD->isLocalVarDeclOrParm())
276         continue;
277 
278       DeclRefExpr DRE(const_cast<VarDecl *>(VD),
279                       /*RefersToEnclosingVariableOrCapture=*/false,
280                       VD->getType().getNonReferenceType(), VK_LValue,
281                       SourceLocation());
282       PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address {
283         return CGF.EmitLValue(&DRE).getAddress();
284       });
285     }
286     (void)PrivScope.Privatize();
287   }
288 
289   /// \brief Lookup the captured field decl for a variable.
290   const FieldDecl *lookup(const VarDecl *VD) const override {
291     if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
292       return FD;
293     return nullptr;
294   }
295 
296   /// \brief Emit the captured statement body.
297   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
298     llvm_unreachable("No body for expressions");
299   }
300 
301   /// \brief Get a variable or parameter for storing global thread id
302   /// inside OpenMP construct.
303   const VarDecl *getThreadIDVariable() const override {
304     llvm_unreachable("No thread id for expressions");
305   }
306 
307   /// \brief Get the name of the capture helper.
308   StringRef getHelperName() const override {
309     llvm_unreachable("No helper name for expressions");
310   }
311 
312   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
313 
314 private:
315   /// Private scope to capture global variables.
316   CodeGenFunction::OMPPrivateScope PrivScope;
317 };
318 
319 /// \brief RAII for emitting code of OpenMP constructs.
320 class InlinedOpenMPRegionRAII {
321   CodeGenFunction &CGF;
322 
323 public:
324   /// \brief Constructs region for combined constructs.
325   /// \param CodeGen Code generation sequence for combined directives. Includes
326   /// a list of functions used for code generation of implicitly inlined
327   /// regions.
328   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
329                           OpenMPDirectiveKind Kind, bool HasCancel)
330       : CGF(CGF) {
331     // Start emission for the construct.
332     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
333         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
334   }
335 
336   ~InlinedOpenMPRegionRAII() {
337     // Restore original CapturedStmtInfo only if we're done with code emission.
338     auto *OldCSI =
339         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
340     delete CGF.CapturedStmtInfo;
341     CGF.CapturedStmtInfo = OldCSI;
342   }
343 };
344 
345 /// \brief Values for bit flags used in the ident_t to describe the fields.
346 /// All enumeric elements are named and described in accordance with the code
347 /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
348 enum OpenMPLocationFlags {
349   /// \brief Use trampoline for internal microtask.
350   OMP_IDENT_IMD = 0x01,
351   /// \brief Use c-style ident structure.
352   OMP_IDENT_KMPC = 0x02,
353   /// \brief Atomic reduction option for kmpc_reduce.
354   OMP_ATOMIC_REDUCE = 0x10,
355   /// \brief Explicit 'barrier' directive.
356   OMP_IDENT_BARRIER_EXPL = 0x20,
357   /// \brief Implicit barrier in code.
358   OMP_IDENT_BARRIER_IMPL = 0x40,
359   /// \brief Implicit barrier in 'for' directive.
360   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
361   /// \brief Implicit barrier in 'sections' directive.
362   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
363   /// \brief Implicit barrier in 'single' directive.
364   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140
365 };
366 
367 /// \brief Describes ident structure that describes a source location.
368 /// All descriptions are taken from
369 /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
370 /// Original structure:
371 /// typedef struct ident {
372 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
373 ///                                  see above  */
374 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
375 ///                                  KMP_IDENT_KMPC identifies this union
376 ///                                  member  */
377 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
378 ///                                  see above */
379 ///#if USE_ITT_BUILD
380 ///                            /*  but currently used for storing
381 ///                                region-specific ITT */
382 ///                            /*  contextual information. */
383 ///#endif /* USE_ITT_BUILD */
384 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
385 ///                                 C++  */
386 ///    char const *psource;    /**< String describing the source location.
387 ///                            The string is composed of semi-colon separated
388 //                             fields which describe the source file,
389 ///                            the function and a pair of line numbers that
390 ///                            delimit the construct.
391 ///                             */
392 /// } ident_t;
393 enum IdentFieldIndex {
394   /// \brief might be used in Fortran
395   IdentField_Reserved_1,
396   /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
397   IdentField_Flags,
398   /// \brief Not really used in Fortran any more
399   IdentField_Reserved_2,
400   /// \brief Source[4] in Fortran, do not use for C++
401   IdentField_Reserved_3,
402   /// \brief String describing the source location. The string is composed of
403   /// semi-colon separated fields which describe the source file, the function
404   /// and a pair of line numbers that delimit the construct.
405   IdentField_PSource
406 };
407 
408 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from
409 /// the enum sched_type in kmp.h).
410 enum OpenMPSchedType {
411   /// \brief Lower bound for default (unordered) versions.
412   OMP_sch_lower = 32,
413   OMP_sch_static_chunked = 33,
414   OMP_sch_static = 34,
415   OMP_sch_dynamic_chunked = 35,
416   OMP_sch_guided_chunked = 36,
417   OMP_sch_runtime = 37,
418   OMP_sch_auto = 38,
419   /// \brief Lower bound for 'ordered' versions.
420   OMP_ord_lower = 64,
421   OMP_ord_static_chunked = 65,
422   OMP_ord_static = 66,
423   OMP_ord_dynamic_chunked = 67,
424   OMP_ord_guided_chunked = 68,
425   OMP_ord_runtime = 69,
426   OMP_ord_auto = 70,
427   OMP_sch_default = OMP_sch_static,
428   /// \brief dist_schedule types
429   OMP_dist_sch_static_chunked = 91,
430   OMP_dist_sch_static = 92,
431 };
432 
433 enum OpenMPRTLFunction {
434   /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
435   /// kmpc_micro microtask, ...);
436   OMPRTL__kmpc_fork_call,
437   /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc,
438   /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
439   OMPRTL__kmpc_threadprivate_cached,
440   /// \brief Call to void __kmpc_threadprivate_register( ident_t *,
441   /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
442   OMPRTL__kmpc_threadprivate_register,
443   // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
444   OMPRTL__kmpc_global_thread_num,
445   // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
446   // kmp_critical_name *crit);
447   OMPRTL__kmpc_critical,
448   // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
449   // global_tid, kmp_critical_name *crit, uintptr_t hint);
450   OMPRTL__kmpc_critical_with_hint,
451   // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
452   // kmp_critical_name *crit);
453   OMPRTL__kmpc_end_critical,
454   // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
455   // global_tid);
456   OMPRTL__kmpc_cancel_barrier,
457   // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
458   OMPRTL__kmpc_barrier,
459   // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
460   OMPRTL__kmpc_for_static_fini,
461   // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
462   // global_tid);
463   OMPRTL__kmpc_serialized_parallel,
464   // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
465   // global_tid);
466   OMPRTL__kmpc_end_serialized_parallel,
467   // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
468   // kmp_int32 num_threads);
469   OMPRTL__kmpc_push_num_threads,
470   // Call to void __kmpc_flush(ident_t *loc);
471   OMPRTL__kmpc_flush,
472   // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
473   OMPRTL__kmpc_master,
474   // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
475   OMPRTL__kmpc_end_master,
476   // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
477   // int end_part);
478   OMPRTL__kmpc_omp_taskyield,
479   // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
480   OMPRTL__kmpc_single,
481   // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
482   OMPRTL__kmpc_end_single,
483   // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
484   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
485   // kmp_routine_entry_t *task_entry);
486   OMPRTL__kmpc_omp_task_alloc,
487   // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
488   // new_task);
489   OMPRTL__kmpc_omp_task,
490   // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
491   // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
492   // kmp_int32 didit);
493   OMPRTL__kmpc_copyprivate,
494   // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
495   // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
496   // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
497   OMPRTL__kmpc_reduce,
498   // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
499   // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
500   // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
501   // *lck);
502   OMPRTL__kmpc_reduce_nowait,
503   // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
504   // kmp_critical_name *lck);
505   OMPRTL__kmpc_end_reduce,
506   // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
507   // kmp_critical_name *lck);
508   OMPRTL__kmpc_end_reduce_nowait,
509   // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
510   // kmp_task_t * new_task);
511   OMPRTL__kmpc_omp_task_begin_if0,
512   // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
513   // kmp_task_t * new_task);
514   OMPRTL__kmpc_omp_task_complete_if0,
515   // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
516   OMPRTL__kmpc_ordered,
517   // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
518   OMPRTL__kmpc_end_ordered,
519   // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
520   // global_tid);
521   OMPRTL__kmpc_omp_taskwait,
522   // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
523   OMPRTL__kmpc_taskgroup,
524   // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
525   OMPRTL__kmpc_end_taskgroup,
526   // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
527   // int proc_bind);
528   OMPRTL__kmpc_push_proc_bind,
529   // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
530   // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
531   // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
532   OMPRTL__kmpc_omp_task_with_deps,
533   // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
534   // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
535   // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
536   OMPRTL__kmpc_omp_wait_deps,
537   // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
538   // global_tid, kmp_int32 cncl_kind);
539   OMPRTL__kmpc_cancellationpoint,
540   // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
541   // kmp_int32 cncl_kind);
542   OMPRTL__kmpc_cancel,
543   // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
544   // kmp_int32 num_teams, kmp_int32 thread_limit);
545   OMPRTL__kmpc_push_num_teams,
546   /// \brief Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc,
547   /// kmpc_micro microtask, ...);
548   OMPRTL__kmpc_fork_teams,
549 
550   //
551   // Offloading related calls
552   //
553   // Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
554   // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
555   // *arg_types);
556   OMPRTL__tgt_target,
557   // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
558   // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
559   // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
560   OMPRTL__tgt_target_teams,
561   // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
562   OMPRTL__tgt_register_lib,
563   // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
564   OMPRTL__tgt_unregister_lib,
565 };
566 
567 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
568 /// region.
569 class CleanupTy final : public EHScopeStack::Cleanup {
570   PrePostActionTy *Action;
571 
572 public:
573   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
574   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
575     if (!CGF.HaveInsertPoint())
576       return;
577     Action->Exit(CGF);
578   }
579 };
580 
581 } // anonymous namespace
582 
583 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
584   CodeGenFunction::RunCleanupsScope Scope(CGF);
585   if (PrePostAction) {
586     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
587     Callback(CodeGen, CGF, *PrePostAction);
588   } else {
589     PrePostActionTy Action;
590     Callback(CodeGen, CGF, Action);
591   }
592 }
593 
594 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
595   return CGF.EmitLoadOfPointerLValue(
596       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
597       getThreadIDVariable()->getType()->castAs<PointerType>());
598 }
599 
600 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
601   if (!CGF.HaveInsertPoint())
602     return;
603   // 1.2.2 OpenMP Language Terminology
604   // Structured block - An executable statement with a single entry at the
605   // top and a single exit at the bottom.
606   // The point of exit cannot be a branch out of the structured block.
607   // longjmp() and throw() must not violate the entry/exit criteria.
608   CGF.EHStack.pushTerminate();
609   CodeGen(CGF);
610   CGF.EHStack.popTerminate();
611 }
612 
613 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
614     CodeGenFunction &CGF) {
615   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
616                             getThreadIDVariable()->getType(),
617                             AlignmentSource::Decl);
618 }
619 
620 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
621     : CGM(CGM), OffloadEntriesInfoManager(CGM) {
622   IdentTy = llvm::StructType::create(
623       "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
624       CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
625       CGM.Int8PtrTy /* psource */, nullptr);
626   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
627 
628   loadOffloadInfoMetadata();
629 }
630 
631 void CGOpenMPRuntime::clear() {
632   InternalVars.clear();
633 }
634 
635 static llvm::Function *
636 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
637                           const Expr *CombinerInitializer, const VarDecl *In,
638                           const VarDecl *Out, bool IsCombiner) {
639   // void .omp_combiner.(Ty *in, Ty *out);
640   auto &C = CGM.getContext();
641   QualType PtrTy = C.getPointerType(Ty).withRestrict();
642   FunctionArgList Args;
643   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
644                                /*Id=*/nullptr, PtrTy);
645   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
646                               /*Id=*/nullptr, PtrTy);
647   Args.push_back(&OmpOutParm);
648   Args.push_back(&OmpInParm);
649   auto &FnInfo =
650       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
651   auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
652   auto *Fn = llvm::Function::Create(
653       FnTy, llvm::GlobalValue::InternalLinkage,
654       IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule());
655   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
656   Fn->addFnAttr(llvm::Attribute::AlwaysInline);
657   CodeGenFunction CGF(CGM);
658   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
659   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
660   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
661   CodeGenFunction::OMPPrivateScope Scope(CGF);
662   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
663   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address {
664     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
665         .getAddress();
666   });
667   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
668   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() -> Address {
669     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
670         .getAddress();
671   });
672   (void)Scope.Privatize();
673   CGF.EmitIgnoredExpr(CombinerInitializer);
674   Scope.ForceCleanup();
675   CGF.FinishFunction();
676   return Fn;
677 }
678 
679 void CGOpenMPRuntime::emitUserDefinedReduction(
680     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
681   if (UDRMap.count(D) > 0)
682     return;
683   auto &C = CGM.getContext();
684   if (!In || !Out) {
685     In = &C.Idents.get("omp_in");
686     Out = &C.Idents.get("omp_out");
687   }
688   llvm::Function *Combiner = emitCombinerOrInitializer(
689       CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()),
690       cast<VarDecl>(D->lookup(Out).front()),
691       /*IsCombiner=*/true);
692   llvm::Function *Initializer = nullptr;
693   if (auto *Init = D->getInitializer()) {
694     if (!Priv || !Orig) {
695       Priv = &C.Idents.get("omp_priv");
696       Orig = &C.Idents.get("omp_orig");
697     }
698     Initializer = emitCombinerOrInitializer(
699         CGM, D->getType(), Init, cast<VarDecl>(D->lookup(Orig).front()),
700         cast<VarDecl>(D->lookup(Priv).front()),
701         /*IsCombiner=*/false);
702   }
703   UDRMap.insert(std::make_pair(D, std::make_pair(Combiner, Initializer)));
704   if (CGF) {
705     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
706     Decls.second.push_back(D);
707   }
708 }
709 
710 std::pair<llvm::Function *, llvm::Function *>
711 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
712   auto I = UDRMap.find(D);
713   if (I != UDRMap.end())
714     return I->second;
715   emitUserDefinedReduction(/*CGF=*/nullptr, D);
716   return UDRMap.lookup(D);
717 }
718 
719 // Layout information for ident_t.
720 static CharUnits getIdentAlign(CodeGenModule &CGM) {
721   return CGM.getPointerAlign();
722 }
723 static CharUnits getIdentSize(CodeGenModule &CGM) {
724   assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign()));
725   return CharUnits::fromQuantity(16) + CGM.getPointerSize();
726 }
727 static CharUnits getOffsetOfIdentField(IdentFieldIndex Field) {
728   // All the fields except the last are i32, so this works beautifully.
729   return unsigned(Field) * CharUnits::fromQuantity(4);
730 }
731 static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr,
732                                    IdentFieldIndex Field,
733                                    const llvm::Twine &Name = "") {
734   auto Offset = getOffsetOfIdentField(Field);
735   return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name);
736 }
737 
738 llvm::Value *CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction(
739     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
740     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
741   assert(ThreadIDVar->getType()->isPointerType() &&
742          "thread id variable must be of type kmp_int32 *");
743   const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
744   CodeGenFunction CGF(CGM, true);
745   bool HasCancel = false;
746   if (auto *OPD = dyn_cast<OMPParallelDirective>(&D))
747     HasCancel = OPD->hasCancel();
748   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
749     HasCancel = OPSD->hasCancel();
750   else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
751     HasCancel = OPFD->hasCancel();
752   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
753                                     HasCancel);
754   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
755   return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
756 }
757 
758 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
759     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
760     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
761   assert(!ThreadIDVar->getType()->isPointerType() &&
762          "thread id variable must be of type kmp_int32 for tasks");
763   auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
764   CodeGenFunction CGF(CGM, true);
765   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
766                                         InnermostKind,
767                                         cast<OMPTaskDirective>(D).hasCancel());
768   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
769   return CGF.GenerateCapturedStmtFunction(*CS);
770 }
771 
772 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
773   CharUnits Align = getIdentAlign(CGM);
774   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
775   if (!Entry) {
776     if (!DefaultOpenMPPSource) {
777       // Initialize default location for psource field of ident_t structure of
778       // all ident_t objects. Format is ";file;function;line;column;;".
779       // Taken from
780       // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
781       DefaultOpenMPPSource =
782           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
783       DefaultOpenMPPSource =
784           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
785     }
786     auto DefaultOpenMPLocation = new llvm::GlobalVariable(
787         CGM.getModule(), IdentTy, /*isConstant*/ true,
788         llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr);
789     DefaultOpenMPLocation->setUnnamedAddr(true);
790     DefaultOpenMPLocation->setAlignment(Align.getQuantity());
791 
792     llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true);
793     llvm::Constant *Values[] = {Zero,
794                                 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
795                                 Zero, Zero, DefaultOpenMPPSource};
796     llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values);
797     DefaultOpenMPLocation->setInitializer(Init);
798     OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
799   }
800   return Address(Entry, Align);
801 }
802 
803 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
804                                                  SourceLocation Loc,
805                                                  unsigned Flags) {
806   Flags |= OMP_IDENT_KMPC;
807   // If no debug info is generated - return global default location.
808   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
809       Loc.isInvalid())
810     return getOrCreateDefaultLocation(Flags).getPointer();
811 
812   assert(CGF.CurFn && "No function in current CodeGenFunction.");
813 
814   Address LocValue = Address::invalid();
815   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
816   if (I != OpenMPLocThreadIDMap.end())
817     LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM));
818 
819   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
820   // GetOpenMPThreadID was called before this routine.
821   if (!LocValue.isValid()) {
822     // Generate "ident_t .kmpc_loc.addr;"
823     Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM),
824                                       ".kmpc_loc.addr");
825     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
826     Elem.second.DebugLoc = AI.getPointer();
827     LocValue = AI;
828 
829     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
830     CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
831     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
832                              CGM.getSize(getIdentSize(CGF.CGM)));
833   }
834 
835   // char **psource = &.kmpc_loc_<flags>.addr.psource;
836   Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource);
837 
838   auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
839   if (OMPDebugLoc == nullptr) {
840     SmallString<128> Buffer2;
841     llvm::raw_svector_ostream OS2(Buffer2);
842     // Build debug location
843     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
844     OS2 << ";" << PLoc.getFilename() << ";";
845     if (const FunctionDecl *FD =
846             dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
847       OS2 << FD->getQualifiedNameAsString();
848     }
849     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
850     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
851     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
852   }
853   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
854   CGF.Builder.CreateStore(OMPDebugLoc, PSource);
855 
856   // Our callers always pass this to a runtime function, so for
857   // convenience, go ahead and return a naked pointer.
858   return LocValue.getPointer();
859 }
860 
861 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
862                                           SourceLocation Loc) {
863   assert(CGF.CurFn && "No function in current CodeGenFunction.");
864 
865   llvm::Value *ThreadID = nullptr;
866   // Check whether we've already cached a load of the thread id in this
867   // function.
868   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
869   if (I != OpenMPLocThreadIDMap.end()) {
870     ThreadID = I->second.ThreadID;
871     if (ThreadID != nullptr)
872       return ThreadID;
873   }
874   if (auto *OMPRegionInfo =
875           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
876     if (OMPRegionInfo->getThreadIDVariable()) {
877       // Check if this an outlined function with thread id passed as argument.
878       auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
879       ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
880       // If value loaded in entry block, cache it and use it everywhere in
881       // function.
882       if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
883         auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
884         Elem.second.ThreadID = ThreadID;
885       }
886       return ThreadID;
887     }
888   }
889 
890   // This is not an outlined function region - need to call __kmpc_int32
891   // kmpc_global_thread_num(ident_t *loc).
892   // Generate thread id value and cache this value for use across the
893   // function.
894   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
895   CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
896   ThreadID =
897       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
898                           emitUpdateLocation(CGF, Loc));
899   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
900   Elem.second.ThreadID = ThreadID;
901   return ThreadID;
902 }
903 
904 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
905   assert(CGF.CurFn && "No function in current CodeGenFunction.");
906   if (OpenMPLocThreadIDMap.count(CGF.CurFn))
907     OpenMPLocThreadIDMap.erase(CGF.CurFn);
908   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
909     for(auto *D : FunctionUDRMap[CGF.CurFn]) {
910       UDRMap.erase(D);
911     }
912     FunctionUDRMap.erase(CGF.CurFn);
913   }
914 }
915 
916 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
917   if (!IdentTy) {
918   }
919   return llvm::PointerType::getUnqual(IdentTy);
920 }
921 
922 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
923   if (!Kmpc_MicroTy) {
924     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
925     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
926                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
927     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
928   }
929   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
930 }
931 
932 llvm::Constant *
933 CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
934   llvm::Constant *RTLFn = nullptr;
935   switch (static_cast<OpenMPRTLFunction>(Function)) {
936   case OMPRTL__kmpc_fork_call: {
937     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
938     // microtask, ...);
939     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
940                                 getKmpc_MicroPointerTy()};
941     llvm::FunctionType *FnTy =
942         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
943     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
944     break;
945   }
946   case OMPRTL__kmpc_global_thread_num: {
947     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
948     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
949     llvm::FunctionType *FnTy =
950         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
951     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
952     break;
953   }
954   case OMPRTL__kmpc_threadprivate_cached: {
955     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
956     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
957     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
958                                 CGM.VoidPtrTy, CGM.SizeTy,
959                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
960     llvm::FunctionType *FnTy =
961         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
962     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
963     break;
964   }
965   case OMPRTL__kmpc_critical: {
966     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
967     // kmp_critical_name *crit);
968     llvm::Type *TypeParams[] = {
969         getIdentTyPointerTy(), CGM.Int32Ty,
970         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
971     llvm::FunctionType *FnTy =
972         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
973     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
974     break;
975   }
976   case OMPRTL__kmpc_critical_with_hint: {
977     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
978     // kmp_critical_name *crit, uintptr_t hint);
979     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
980                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
981                                 CGM.IntPtrTy};
982     llvm::FunctionType *FnTy =
983         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
984     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
985     break;
986   }
987   case OMPRTL__kmpc_threadprivate_register: {
988     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
989     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
990     // typedef void *(*kmpc_ctor)(void *);
991     auto KmpcCtorTy =
992         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
993                                 /*isVarArg*/ false)->getPointerTo();
994     // typedef void *(*kmpc_cctor)(void *, void *);
995     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
996     auto KmpcCopyCtorTy =
997         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
998                                 /*isVarArg*/ false)->getPointerTo();
999     // typedef void (*kmpc_dtor)(void *);
1000     auto KmpcDtorTy =
1001         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1002             ->getPointerTo();
1003     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1004                               KmpcCopyCtorTy, KmpcDtorTy};
1005     auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1006                                         /*isVarArg*/ false);
1007     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1008     break;
1009   }
1010   case OMPRTL__kmpc_end_critical: {
1011     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1012     // kmp_critical_name *crit);
1013     llvm::Type *TypeParams[] = {
1014         getIdentTyPointerTy(), CGM.Int32Ty,
1015         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1016     llvm::FunctionType *FnTy =
1017         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1018     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1019     break;
1020   }
1021   case OMPRTL__kmpc_cancel_barrier: {
1022     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1023     // global_tid);
1024     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1025     llvm::FunctionType *FnTy =
1026         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1027     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1028     break;
1029   }
1030   case OMPRTL__kmpc_barrier: {
1031     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1032     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1033     llvm::FunctionType *FnTy =
1034         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1035     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1036     break;
1037   }
1038   case OMPRTL__kmpc_for_static_fini: {
1039     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1040     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1041     llvm::FunctionType *FnTy =
1042         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1043     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1044     break;
1045   }
1046   case OMPRTL__kmpc_push_num_threads: {
1047     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1048     // kmp_int32 num_threads)
1049     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1050                                 CGM.Int32Ty};
1051     llvm::FunctionType *FnTy =
1052         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1053     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1054     break;
1055   }
1056   case OMPRTL__kmpc_serialized_parallel: {
1057     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1058     // global_tid);
1059     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1060     llvm::FunctionType *FnTy =
1061         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1062     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1063     break;
1064   }
1065   case OMPRTL__kmpc_end_serialized_parallel: {
1066     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1067     // global_tid);
1068     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1069     llvm::FunctionType *FnTy =
1070         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1071     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1072     break;
1073   }
1074   case OMPRTL__kmpc_flush: {
1075     // Build void __kmpc_flush(ident_t *loc);
1076     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1077     llvm::FunctionType *FnTy =
1078         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1079     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1080     break;
1081   }
1082   case OMPRTL__kmpc_master: {
1083     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1084     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1085     llvm::FunctionType *FnTy =
1086         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1087     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1088     break;
1089   }
1090   case OMPRTL__kmpc_end_master: {
1091     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1092     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1093     llvm::FunctionType *FnTy =
1094         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1095     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1096     break;
1097   }
1098   case OMPRTL__kmpc_omp_taskyield: {
1099     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1100     // int end_part);
1101     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1102     llvm::FunctionType *FnTy =
1103         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1104     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1105     break;
1106   }
1107   case OMPRTL__kmpc_single: {
1108     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1109     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1110     llvm::FunctionType *FnTy =
1111         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1112     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1113     break;
1114   }
1115   case OMPRTL__kmpc_end_single: {
1116     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1117     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1118     llvm::FunctionType *FnTy =
1119         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1120     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1121     break;
1122   }
1123   case OMPRTL__kmpc_omp_task_alloc: {
1124     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1125     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1126     // kmp_routine_entry_t *task_entry);
1127     assert(KmpRoutineEntryPtrTy != nullptr &&
1128            "Type kmp_routine_entry_t must be created.");
1129     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1130                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
1131     // Return void * and then cast to particular kmp_task_t type.
1132     llvm::FunctionType *FnTy =
1133         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1134     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
1135     break;
1136   }
1137   case OMPRTL__kmpc_omp_task: {
1138     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1139     // *new_task);
1140     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1141                                 CGM.VoidPtrTy};
1142     llvm::FunctionType *FnTy =
1143         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1144     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
1145     break;
1146   }
1147   case OMPRTL__kmpc_copyprivate: {
1148     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
1149     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
1150     // kmp_int32 didit);
1151     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1152     auto *CpyFnTy =
1153         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
1154     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
1155                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
1156                                 CGM.Int32Ty};
1157     llvm::FunctionType *FnTy =
1158         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1159     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
1160     break;
1161   }
1162   case OMPRTL__kmpc_reduce: {
1163     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
1164     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
1165     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
1166     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1167     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1168                                                /*isVarArg=*/false);
1169     llvm::Type *TypeParams[] = {
1170         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1171         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1172         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1173     llvm::FunctionType *FnTy =
1174         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1175     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
1176     break;
1177   }
1178   case OMPRTL__kmpc_reduce_nowait: {
1179     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
1180     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
1181     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
1182     // *lck);
1183     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1184     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1185                                                /*isVarArg=*/false);
1186     llvm::Type *TypeParams[] = {
1187         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1188         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1189         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1190     llvm::FunctionType *FnTy =
1191         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1192     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
1193     break;
1194   }
1195   case OMPRTL__kmpc_end_reduce: {
1196     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
1197     // kmp_critical_name *lck);
1198     llvm::Type *TypeParams[] = {
1199         getIdentTyPointerTy(), CGM.Int32Ty,
1200         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1201     llvm::FunctionType *FnTy =
1202         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1203     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
1204     break;
1205   }
1206   case OMPRTL__kmpc_end_reduce_nowait: {
1207     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
1208     // kmp_critical_name *lck);
1209     llvm::Type *TypeParams[] = {
1210         getIdentTyPointerTy(), CGM.Int32Ty,
1211         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1212     llvm::FunctionType *FnTy =
1213         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1214     RTLFn =
1215         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
1216     break;
1217   }
1218   case OMPRTL__kmpc_omp_task_begin_if0: {
1219     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1220     // *new_task);
1221     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1222                                 CGM.VoidPtrTy};
1223     llvm::FunctionType *FnTy =
1224         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1225     RTLFn =
1226         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
1227     break;
1228   }
1229   case OMPRTL__kmpc_omp_task_complete_if0: {
1230     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1231     // *new_task);
1232     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1233                                 CGM.VoidPtrTy};
1234     llvm::FunctionType *FnTy =
1235         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1236     RTLFn = CGM.CreateRuntimeFunction(FnTy,
1237                                       /*Name=*/"__kmpc_omp_task_complete_if0");
1238     break;
1239   }
1240   case OMPRTL__kmpc_ordered: {
1241     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
1242     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1243     llvm::FunctionType *FnTy =
1244         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1245     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
1246     break;
1247   }
1248   case OMPRTL__kmpc_end_ordered: {
1249     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
1250     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1251     llvm::FunctionType *FnTy =
1252         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1253     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
1254     break;
1255   }
1256   case OMPRTL__kmpc_omp_taskwait: {
1257     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
1258     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1259     llvm::FunctionType *FnTy =
1260         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1261     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
1262     break;
1263   }
1264   case OMPRTL__kmpc_taskgroup: {
1265     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
1266     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1267     llvm::FunctionType *FnTy =
1268         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1269     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
1270     break;
1271   }
1272   case OMPRTL__kmpc_end_taskgroup: {
1273     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
1274     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1275     llvm::FunctionType *FnTy =
1276         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1277     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
1278     break;
1279   }
1280   case OMPRTL__kmpc_push_proc_bind: {
1281     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
1282     // int proc_bind)
1283     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1284     llvm::FunctionType *FnTy =
1285         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1286     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
1287     break;
1288   }
1289   case OMPRTL__kmpc_omp_task_with_deps: {
1290     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
1291     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
1292     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
1293     llvm::Type *TypeParams[] = {
1294         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
1295         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
1296     llvm::FunctionType *FnTy =
1297         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1298     RTLFn =
1299         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
1300     break;
1301   }
1302   case OMPRTL__kmpc_omp_wait_deps: {
1303     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
1304     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
1305     // kmp_depend_info_t *noalias_dep_list);
1306     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1307                                 CGM.Int32Ty,           CGM.VoidPtrTy,
1308                                 CGM.Int32Ty,           CGM.VoidPtrTy};
1309     llvm::FunctionType *FnTy =
1310         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1311     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
1312     break;
1313   }
1314   case OMPRTL__kmpc_cancellationpoint: {
1315     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
1316     // global_tid, kmp_int32 cncl_kind)
1317     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1318     llvm::FunctionType *FnTy =
1319         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1320     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
1321     break;
1322   }
1323   case OMPRTL__kmpc_cancel: {
1324     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
1325     // kmp_int32 cncl_kind)
1326     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1327     llvm::FunctionType *FnTy =
1328         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1329     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
1330     break;
1331   }
1332   case OMPRTL__kmpc_push_num_teams: {
1333     // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
1334     // kmp_int32 num_teams, kmp_int32 num_threads)
1335     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1336         CGM.Int32Ty};
1337     llvm::FunctionType *FnTy =
1338         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1339     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
1340     break;
1341   }
1342   case OMPRTL__kmpc_fork_teams: {
1343     // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
1344     // microtask, ...);
1345     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1346                                 getKmpc_MicroPointerTy()};
1347     llvm::FunctionType *FnTy =
1348         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1349     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
1350     break;
1351   }
1352   case OMPRTL__tgt_target: {
1353     // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
1354     // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
1355     // *arg_types);
1356     llvm::Type *TypeParams[] = {CGM.Int32Ty,
1357                                 CGM.VoidPtrTy,
1358                                 CGM.Int32Ty,
1359                                 CGM.VoidPtrPtrTy,
1360                                 CGM.VoidPtrPtrTy,
1361                                 CGM.SizeTy->getPointerTo(),
1362                                 CGM.Int32Ty->getPointerTo()};
1363     llvm::FunctionType *FnTy =
1364         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1365     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
1366     break;
1367   }
1368   case OMPRTL__tgt_target_teams: {
1369     // Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
1370     // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
1371     // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
1372     llvm::Type *TypeParams[] = {CGM.Int32Ty,
1373                                 CGM.VoidPtrTy,
1374                                 CGM.Int32Ty,
1375                                 CGM.VoidPtrPtrTy,
1376                                 CGM.VoidPtrPtrTy,
1377                                 CGM.SizeTy->getPointerTo(),
1378                                 CGM.Int32Ty->getPointerTo(),
1379                                 CGM.Int32Ty,
1380                                 CGM.Int32Ty};
1381     llvm::FunctionType *FnTy =
1382         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1383     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
1384     break;
1385   }
1386   case OMPRTL__tgt_register_lib: {
1387     // Build void __tgt_register_lib(__tgt_bin_desc *desc);
1388     QualType ParamTy =
1389         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
1390     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
1391     llvm::FunctionType *FnTy =
1392         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1393     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
1394     break;
1395   }
1396   case OMPRTL__tgt_unregister_lib: {
1397     // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
1398     QualType ParamTy =
1399         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
1400     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
1401     llvm::FunctionType *FnTy =
1402         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1403     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
1404     break;
1405   }
1406   }
1407   assert(RTLFn && "Unable to find OpenMP runtime function");
1408   return RTLFn;
1409 }
1410 
1411 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
1412                                                              bool IVSigned) {
1413   assert((IVSize == 32 || IVSize == 64) &&
1414          "IV size is not compatible with the omp runtime");
1415   auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1416                                        : "__kmpc_for_static_init_4u")
1417                            : (IVSigned ? "__kmpc_for_static_init_8"
1418                                        : "__kmpc_for_static_init_8u");
1419   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1420   auto PtrTy = llvm::PointerType::getUnqual(ITy);
1421   llvm::Type *TypeParams[] = {
1422     getIdentTyPointerTy(),                     // loc
1423     CGM.Int32Ty,                               // tid
1424     CGM.Int32Ty,                               // schedtype
1425     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1426     PtrTy,                                     // p_lower
1427     PtrTy,                                     // p_upper
1428     PtrTy,                                     // p_stride
1429     ITy,                                       // incr
1430     ITy                                        // chunk
1431   };
1432   llvm::FunctionType *FnTy =
1433       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1434   return CGM.CreateRuntimeFunction(FnTy, Name);
1435 }
1436 
1437 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
1438                                                             bool IVSigned) {
1439   assert((IVSize == 32 || IVSize == 64) &&
1440          "IV size is not compatible with the omp runtime");
1441   auto Name =
1442       IVSize == 32
1443           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1444           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1445   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1446   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1447                                CGM.Int32Ty,           // tid
1448                                CGM.Int32Ty,           // schedtype
1449                                ITy,                   // lower
1450                                ITy,                   // upper
1451                                ITy,                   // stride
1452                                ITy                    // chunk
1453   };
1454   llvm::FunctionType *FnTy =
1455       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1456   return CGM.CreateRuntimeFunction(FnTy, Name);
1457 }
1458 
1459 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
1460                                                             bool IVSigned) {
1461   assert((IVSize == 32 || IVSize == 64) &&
1462          "IV size is not compatible with the omp runtime");
1463   auto Name =
1464       IVSize == 32
1465           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1466           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1467   llvm::Type *TypeParams[] = {
1468       getIdentTyPointerTy(), // loc
1469       CGM.Int32Ty,           // tid
1470   };
1471   llvm::FunctionType *FnTy =
1472       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1473   return CGM.CreateRuntimeFunction(FnTy, Name);
1474 }
1475 
1476 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
1477                                                             bool IVSigned) {
1478   assert((IVSize == 32 || IVSize == 64) &&
1479          "IV size is not compatible with the omp runtime");
1480   auto Name =
1481       IVSize == 32
1482           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1483           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1484   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1485   auto PtrTy = llvm::PointerType::getUnqual(ITy);
1486   llvm::Type *TypeParams[] = {
1487     getIdentTyPointerTy(),                     // loc
1488     CGM.Int32Ty,                               // tid
1489     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1490     PtrTy,                                     // p_lower
1491     PtrTy,                                     // p_upper
1492     PtrTy                                      // p_stride
1493   };
1494   llvm::FunctionType *FnTy =
1495       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1496   return CGM.CreateRuntimeFunction(FnTy, Name);
1497 }
1498 
1499 llvm::Constant *
1500 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1501   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1502          !CGM.getContext().getTargetInfo().isTLSSupported());
1503   // Lookup the entry, lazily creating it if necessary.
1504   return getOrCreateInternalVariable(CGM.Int8PtrPtrTy,
1505                                      Twine(CGM.getMangledName(VD)) + ".cache.");
1506 }
1507 
1508 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1509                                                 const VarDecl *VD,
1510                                                 Address VDAddr,
1511                                                 SourceLocation Loc) {
1512   if (CGM.getLangOpts().OpenMPUseTLS &&
1513       CGM.getContext().getTargetInfo().isTLSSupported())
1514     return VDAddr;
1515 
1516   auto VarTy = VDAddr.getElementType();
1517   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1518                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1519                                                        CGM.Int8PtrTy),
1520                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1521                          getOrCreateThreadPrivateCache(VD)};
1522   return Address(CGF.EmitRuntimeCall(
1523       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
1524                  VDAddr.getAlignment());
1525 }
1526 
1527 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1528     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1529     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1530   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1531   // library.
1532   auto OMPLoc = emitUpdateLocation(CGF, Loc);
1533   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1534                       OMPLoc);
1535   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1536   // to register constructor/destructor for variable.
1537   llvm::Value *Args[] = {OMPLoc,
1538                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1539                                                        CGM.VoidPtrTy),
1540                          Ctor, CopyCtor, Dtor};
1541   CGF.EmitRuntimeCall(
1542       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
1543 }
1544 
1545 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1546     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1547     bool PerformInit, CodeGenFunction *CGF) {
1548   if (CGM.getLangOpts().OpenMPUseTLS &&
1549       CGM.getContext().getTargetInfo().isTLSSupported())
1550     return nullptr;
1551 
1552   VD = VD->getDefinition(CGM.getContext());
1553   if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
1554     ThreadPrivateWithDefinition.insert(VD);
1555     QualType ASTTy = VD->getType();
1556 
1557     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1558     auto Init = VD->getAnyInitializer();
1559     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1560       // Generate function that re-emits the declaration's initializer into the
1561       // threadprivate copy of the variable VD
1562       CodeGenFunction CtorCGF(CGM);
1563       FunctionArgList Args;
1564       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
1565                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
1566       Args.push_back(&Dst);
1567 
1568       auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1569           CGM.getContext().VoidPtrTy, Args);
1570       auto FTy = CGM.getTypes().GetFunctionType(FI);
1571       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
1572           FTy, ".__kmpc_global_ctor_.", FI, Loc);
1573       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1574                             Args, SourceLocation());
1575       auto ArgVal = CtorCGF.EmitLoadOfScalar(
1576           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1577           CGM.getContext().VoidPtrTy, Dst.getLocation());
1578       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1579       Arg = CtorCGF.Builder.CreateElementBitCast(Arg,
1580                                              CtorCGF.ConvertTypeForMem(ASTTy));
1581       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1582                                /*IsInitializer=*/true);
1583       ArgVal = CtorCGF.EmitLoadOfScalar(
1584           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1585           CGM.getContext().VoidPtrTy, Dst.getLocation());
1586       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1587       CtorCGF.FinishFunction();
1588       Ctor = Fn;
1589     }
1590     if (VD->getType().isDestructedType() != QualType::DK_none) {
1591       // Generate function that emits destructor call for the threadprivate copy
1592       // of the variable VD
1593       CodeGenFunction DtorCGF(CGM);
1594       FunctionArgList Args;
1595       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
1596                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
1597       Args.push_back(&Dst);
1598 
1599       auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1600           CGM.getContext().VoidTy, Args);
1601       auto FTy = CGM.getTypes().GetFunctionType(FI);
1602       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
1603           FTy, ".__kmpc_global_dtor_.", FI, Loc);
1604       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1605                             SourceLocation());
1606       auto ArgVal = DtorCGF.EmitLoadOfScalar(
1607           DtorCGF.GetAddrOfLocalVar(&Dst),
1608           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1609       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1610                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1611                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1612       DtorCGF.FinishFunction();
1613       Dtor = Fn;
1614     }
1615     // Do not emit init function if it is not required.
1616     if (!Ctor && !Dtor)
1617       return nullptr;
1618 
1619     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1620     auto CopyCtorTy =
1621         llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1622                                 /*isVarArg=*/false)->getPointerTo();
1623     // Copying constructor for the threadprivate variable.
1624     // Must be NULL - reserved by runtime, but currently it requires that this
1625     // parameter is always NULL. Otherwise it fires assertion.
1626     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1627     if (Ctor == nullptr) {
1628       auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1629                                             /*isVarArg=*/false)->getPointerTo();
1630       Ctor = llvm::Constant::getNullValue(CtorTy);
1631     }
1632     if (Dtor == nullptr) {
1633       auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1634                                             /*isVarArg=*/false)->getPointerTo();
1635       Dtor = llvm::Constant::getNullValue(DtorTy);
1636     }
1637     if (!CGF) {
1638       auto InitFunctionTy =
1639           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1640       auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
1641           InitFunctionTy, ".__omp_threadprivate_init_.",
1642           CGM.getTypes().arrangeNullaryFunction());
1643       CodeGenFunction InitCGF(CGM);
1644       FunctionArgList ArgList;
1645       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1646                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1647                             Loc);
1648       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1649       InitCGF.FinishFunction();
1650       return InitFunction;
1651     }
1652     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1653   }
1654   return nullptr;
1655 }
1656 
1657 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
1658 /// function. Here is the logic:
1659 /// if (Cond) {
1660 ///   ThenGen();
1661 /// } else {
1662 ///   ElseGen();
1663 /// }
1664 static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
1665                             const RegionCodeGenTy &ThenGen,
1666                             const RegionCodeGenTy &ElseGen) {
1667   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1668 
1669   // If the condition constant folds and can be elided, try to avoid emitting
1670   // the condition and the dead arm of the if/else.
1671   bool CondConstant;
1672   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1673     if (CondConstant)
1674       ThenGen(CGF);
1675     else
1676       ElseGen(CGF);
1677     return;
1678   }
1679 
1680   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
1681   // emit the conditional branch.
1682   auto ThenBlock = CGF.createBasicBlock("omp_if.then");
1683   auto ElseBlock = CGF.createBasicBlock("omp_if.else");
1684   auto ContBlock = CGF.createBasicBlock("omp_if.end");
1685   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1686 
1687   // Emit the 'then' code.
1688   CGF.EmitBlock(ThenBlock);
1689   ThenGen(CGF);
1690   CGF.EmitBranch(ContBlock);
1691   // Emit the 'else' code if present.
1692   // There is no need to emit line number for unconditional branch.
1693   (void)ApplyDebugLocation::CreateEmpty(CGF);
1694   CGF.EmitBlock(ElseBlock);
1695   ElseGen(CGF);
1696   // There is no need to emit line number for unconditional branch.
1697   (void)ApplyDebugLocation::CreateEmpty(CGF);
1698   CGF.EmitBranch(ContBlock);
1699   // Emit the continuation block for code after the if.
1700   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1701 }
1702 
1703 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1704                                        llvm::Value *OutlinedFn,
1705                                        ArrayRef<llvm::Value *> CapturedVars,
1706                                        const Expr *IfCond) {
1707   if (!CGF.HaveInsertPoint())
1708     return;
1709   auto *RTLoc = emitUpdateLocation(CGF, Loc);
1710   auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
1711                                                      PrePostActionTy &) {
1712     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1713     auto &RT = CGF.CGM.getOpenMPRuntime();
1714     llvm::Value *Args[] = {
1715         RTLoc,
1716         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1717         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
1718     llvm::SmallVector<llvm::Value *, 16> RealArgs;
1719     RealArgs.append(std::begin(Args), std::end(Args));
1720     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1721 
1722     auto RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
1723     CGF.EmitRuntimeCall(RTLFn, RealArgs);
1724   };
1725   auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
1726                                                           PrePostActionTy &) {
1727     auto &RT = CGF.CGM.getOpenMPRuntime();
1728     auto ThreadID = RT.getThreadID(CGF, Loc);
1729     // Build calls:
1730     // __kmpc_serialized_parallel(&Loc, GTid);
1731     llvm::Value *Args[] = {RTLoc, ThreadID};
1732     CGF.EmitRuntimeCall(
1733         RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
1734 
1735     // OutlinedFn(&GTid, &zero, CapturedStruct);
1736     auto ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1737     Address ZeroAddr =
1738         CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
1739                              /*Name*/ ".zero.addr");
1740     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
1741     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
1742     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
1743     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
1744     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1745     CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
1746 
1747     // __kmpc_end_serialized_parallel(&Loc, GTid);
1748     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1749     CGF.EmitRuntimeCall(
1750         RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
1751         EndArgs);
1752   };
1753   if (IfCond)
1754     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
1755   else {
1756     RegionCodeGenTy ThenRCG(ThenGen);
1757     ThenRCG(CGF);
1758   }
1759 }
1760 
1761 // If we're inside an (outlined) parallel region, use the region info's
1762 // thread-ID variable (it is passed in a first argument of the outlined function
1763 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1764 // regular serial code region, get thread ID by calling kmp_int32
1765 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1766 // return the address of that temp.
1767 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
1768                                              SourceLocation Loc) {
1769   if (auto *OMPRegionInfo =
1770           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1771     if (OMPRegionInfo->getThreadIDVariable())
1772       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1773 
1774   auto ThreadID = getThreadID(CGF, Loc);
1775   auto Int32Ty =
1776       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1777   auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1778   CGF.EmitStoreOfScalar(ThreadID,
1779                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
1780 
1781   return ThreadIDTemp;
1782 }
1783 
1784 llvm::Constant *
1785 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
1786                                              const llvm::Twine &Name) {
1787   SmallString<256> Buffer;
1788   llvm::raw_svector_ostream Out(Buffer);
1789   Out << Name;
1790   auto RuntimeName = Out.str();
1791   auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
1792   if (Elem.second) {
1793     assert(Elem.second->getType()->getPointerElementType() == Ty &&
1794            "OMP internal variable has different type than requested");
1795     return &*Elem.second;
1796   }
1797 
1798   return Elem.second = new llvm::GlobalVariable(
1799              CGM.getModule(), Ty, /*IsConstant*/ false,
1800              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
1801              Elem.first());
1802 }
1803 
1804 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1805   llvm::Twine Name(".gomp_critical_user_", CriticalName);
1806   return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
1807 }
1808 
1809 namespace {
1810 /// Common pre(post)-action for different OpenMP constructs.
1811 class CommonActionTy final : public PrePostActionTy {
1812   llvm::Value *EnterCallee;
1813   ArrayRef<llvm::Value *> EnterArgs;
1814   llvm::Value *ExitCallee;
1815   ArrayRef<llvm::Value *> ExitArgs;
1816   bool Conditional;
1817   llvm::BasicBlock *ContBlock = nullptr;
1818 
1819 public:
1820   CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs,
1821                  llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs,
1822                  bool Conditional = false)
1823       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
1824         ExitArgs(ExitArgs), Conditional(Conditional) {}
1825   void Enter(CodeGenFunction &CGF) override {
1826     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
1827     if (Conditional) {
1828       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
1829       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1830       ContBlock = CGF.createBasicBlock("omp_if.end");
1831       // Generate the branch (If-stmt)
1832       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1833       CGF.EmitBlock(ThenBlock);
1834     }
1835   }
1836   void Done(CodeGenFunction &CGF) {
1837     // Emit the rest of blocks/branches
1838     CGF.EmitBranch(ContBlock);
1839     CGF.EmitBlock(ContBlock, true);
1840   }
1841   void Exit(CodeGenFunction &CGF) override {
1842     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
1843   }
1844 };
1845 } // anonymous namespace
1846 
1847 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
1848                                          StringRef CriticalName,
1849                                          const RegionCodeGenTy &CriticalOpGen,
1850                                          SourceLocation Loc, const Expr *Hint) {
1851   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
1852   // CriticalOpGen();
1853   // __kmpc_end_critical(ident_t *, gtid, Lock);
1854   // Prepare arguments and build a call to __kmpc_critical
1855   if (!CGF.HaveInsertPoint())
1856     return;
1857   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1858                          getCriticalRegionLock(CriticalName)};
1859   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
1860                                                 std::end(Args));
1861   if (Hint) {
1862     EnterArgs.push_back(CGF.Builder.CreateIntCast(
1863         CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
1864   }
1865   CommonActionTy Action(
1866       createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
1867                                  : OMPRTL__kmpc_critical),
1868       EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
1869   CriticalOpGen.setAction(Action);
1870   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
1871 }
1872 
1873 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
1874                                        const RegionCodeGenTy &MasterOpGen,
1875                                        SourceLocation Loc) {
1876   if (!CGF.HaveInsertPoint())
1877     return;
1878   // if(__kmpc_master(ident_t *, gtid)) {
1879   //   MasterOpGen();
1880   //   __kmpc_end_master(ident_t *, gtid);
1881   // }
1882   // Prepare arguments and build a call to __kmpc_master
1883   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1884   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
1885                         createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
1886                         /*Conditional=*/true);
1887   MasterOpGen.setAction(Action);
1888   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
1889   Action.Done(CGF);
1890 }
1891 
1892 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
1893                                         SourceLocation Loc) {
1894   if (!CGF.HaveInsertPoint())
1895     return;
1896   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
1897   llvm::Value *Args[] = {
1898       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1899       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
1900   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
1901 }
1902 
1903 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
1904                                           const RegionCodeGenTy &TaskgroupOpGen,
1905                                           SourceLocation Loc) {
1906   if (!CGF.HaveInsertPoint())
1907     return;
1908   // __kmpc_taskgroup(ident_t *, gtid);
1909   // TaskgroupOpGen();
1910   // __kmpc_end_taskgroup(ident_t *, gtid);
1911   // Prepare arguments and build a call to __kmpc_taskgroup
1912   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1913   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
1914                         createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
1915                         Args);
1916   TaskgroupOpGen.setAction(Action);
1917   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
1918 }
1919 
1920 /// Given an array of pointers to variables, project the address of a
1921 /// given variable.
1922 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
1923                                       unsigned Index, const VarDecl *Var) {
1924   // Pull out the pointer to the variable.
1925   Address PtrAddr =
1926       CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize());
1927   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
1928 
1929   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
1930   Addr = CGF.Builder.CreateElementBitCast(
1931       Addr, CGF.ConvertTypeForMem(Var->getType()));
1932   return Addr;
1933 }
1934 
1935 static llvm::Value *emitCopyprivateCopyFunction(
1936     CodeGenModule &CGM, llvm::Type *ArgsType,
1937     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
1938     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) {
1939   auto &C = CGM.getContext();
1940   // void copy_func(void *LHSArg, void *RHSArg);
1941   FunctionArgList Args;
1942   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1943                            C.VoidPtrTy);
1944   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1945                            C.VoidPtrTy);
1946   Args.push_back(&LHSArg);
1947   Args.push_back(&RHSArg);
1948   auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1949   auto *Fn = llvm::Function::Create(
1950       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
1951       ".omp.copyprivate.copy_func", &CGM.getModule());
1952   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
1953   CodeGenFunction CGF(CGM);
1954   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
1955   // Dest = (void*[n])(LHSArg);
1956   // Src = (void*[n])(RHSArg);
1957   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1958       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
1959       ArgsType), CGF.getPointerAlign());
1960   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1961       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
1962       ArgsType), CGF.getPointerAlign());
1963   // *(Type0*)Dst[0] = *(Type0*)Src[0];
1964   // *(Type1*)Dst[1] = *(Type1*)Src[1];
1965   // ...
1966   // *(Typen*)Dst[n] = *(Typen*)Src[n];
1967   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
1968     auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
1969     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
1970 
1971     auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
1972     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
1973 
1974     auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
1975     QualType Type = VD->getType();
1976     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
1977   }
1978   CGF.FinishFunction();
1979   return Fn;
1980 }
1981 
1982 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
1983                                        const RegionCodeGenTy &SingleOpGen,
1984                                        SourceLocation Loc,
1985                                        ArrayRef<const Expr *> CopyprivateVars,
1986                                        ArrayRef<const Expr *> SrcExprs,
1987                                        ArrayRef<const Expr *> DstExprs,
1988                                        ArrayRef<const Expr *> AssignmentOps) {
1989   if (!CGF.HaveInsertPoint())
1990     return;
1991   assert(CopyprivateVars.size() == SrcExprs.size() &&
1992          CopyprivateVars.size() == DstExprs.size() &&
1993          CopyprivateVars.size() == AssignmentOps.size());
1994   auto &C = CGM.getContext();
1995   // int32 did_it = 0;
1996   // if(__kmpc_single(ident_t *, gtid)) {
1997   //   SingleOpGen();
1998   //   __kmpc_end_single(ident_t *, gtid);
1999   //   did_it = 1;
2000   // }
2001   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2002   // <copy_func>, did_it);
2003 
2004   Address DidIt = Address::invalid();
2005   if (!CopyprivateVars.empty()) {
2006     // int32 did_it = 0;
2007     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2008     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2009     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2010   }
2011   // Prepare arguments and build a call to __kmpc_single
2012   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2013   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
2014                         createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
2015                         /*Conditional=*/true);
2016   SingleOpGen.setAction(Action);
2017   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2018   if (DidIt.isValid()) {
2019     // did_it = 1;
2020     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2021   }
2022   Action.Done(CGF);
2023   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2024   // <copy_func>, did_it);
2025   if (DidIt.isValid()) {
2026     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2027     auto CopyprivateArrayTy =
2028         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
2029                                /*IndexTypeQuals=*/0);
2030     // Create a list of all private variables for copyprivate.
2031     Address CopyprivateList =
2032         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2033     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2034       Address Elem = CGF.Builder.CreateConstArrayGEP(
2035           CopyprivateList, I, CGF.getPointerSize());
2036       CGF.Builder.CreateStore(
2037           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2038               CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
2039           Elem);
2040     }
2041     // Build function that copies private values from single region to all other
2042     // threads in the corresponding parallel region.
2043     auto *CpyFn = emitCopyprivateCopyFunction(
2044         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2045         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
2046     auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2047     Address CL =
2048       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2049                                                       CGF.VoidPtrTy);
2050     auto *DidItVal = CGF.Builder.CreateLoad(DidIt);
2051     llvm::Value *Args[] = {
2052         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2053         getThreadID(CGF, Loc),        // i32 <gtid>
2054         BufSize,                      // size_t <buf_size>
2055         CL.getPointer(),              // void *<copyprivate list>
2056         CpyFn,                        // void (*) (void *, void *) <copy_func>
2057         DidItVal                      // i32 did_it
2058     };
2059     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
2060   }
2061 }
2062 
2063 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2064                                         const RegionCodeGenTy &OrderedOpGen,
2065                                         SourceLocation Loc, bool IsThreads) {
2066   if (!CGF.HaveInsertPoint())
2067     return;
2068   // __kmpc_ordered(ident_t *, gtid);
2069   // OrderedOpGen();
2070   // __kmpc_end_ordered(ident_t *, gtid);
2071   // Prepare arguments and build a call to __kmpc_ordered
2072   if (IsThreads) {
2073     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2074     CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
2075                           createRuntimeFunction(OMPRTL__kmpc_end_ordered),
2076                           Args);
2077     OrderedOpGen.setAction(Action);
2078     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2079     return;
2080   }
2081   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2082 }
2083 
2084 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2085                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2086                                       bool ForceSimpleCall) {
2087   if (!CGF.HaveInsertPoint())
2088     return;
2089   // Build call __kmpc_cancel_barrier(loc, thread_id);
2090   // Build call __kmpc_barrier(loc, thread_id);
2091   unsigned Flags;
2092   if (Kind == OMPD_for)
2093     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2094   else if (Kind == OMPD_sections)
2095     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2096   else if (Kind == OMPD_single)
2097     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2098   else if (Kind == OMPD_barrier)
2099     Flags = OMP_IDENT_BARRIER_EXPL;
2100   else
2101     Flags = OMP_IDENT_BARRIER_IMPL;
2102   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2103   // thread_id);
2104   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2105                          getThreadID(CGF, Loc)};
2106   if (auto *OMPRegionInfo =
2107           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
2108     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2109       auto *Result = CGF.EmitRuntimeCall(
2110           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
2111       if (EmitChecks) {
2112         // if (__kmpc_cancel_barrier()) {
2113         //   exit from construct;
2114         // }
2115         auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
2116         auto *ContBB = CGF.createBasicBlock(".cancel.continue");
2117         auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
2118         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2119         CGF.EmitBlock(ExitBB);
2120         //   exit from construct;
2121         auto CancelDestination =
2122             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2123         CGF.EmitBranchThroughCleanup(CancelDestination);
2124         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2125       }
2126       return;
2127     }
2128   }
2129   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
2130 }
2131 
2132 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
2133 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2134                                           bool Chunked, bool Ordered) {
2135   switch (ScheduleKind) {
2136   case OMPC_SCHEDULE_static:
2137     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2138                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2139   case OMPC_SCHEDULE_dynamic:
2140     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2141   case OMPC_SCHEDULE_guided:
2142     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2143   case OMPC_SCHEDULE_runtime:
2144     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2145   case OMPC_SCHEDULE_auto:
2146     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2147   case OMPC_SCHEDULE_unknown:
2148     assert(!Chunked && "chunk was specified but schedule kind not known");
2149     return Ordered ? OMP_ord_static : OMP_sch_static;
2150   }
2151   llvm_unreachable("Unexpected runtime schedule");
2152 }
2153 
2154 /// \brief Map the OpenMP distribute schedule to the runtime enumeration.
2155 static OpenMPSchedType
2156 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2157   // only static is allowed for dist_schedule
2158   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2159 }
2160 
2161 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2162                                          bool Chunked) const {
2163   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2164   return Schedule == OMP_sch_static;
2165 }
2166 
2167 bool CGOpenMPRuntime::isStaticNonchunked(
2168     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2169   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2170   return Schedule == OMP_dist_sch_static;
2171 }
2172 
2173 
2174 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2175   auto Schedule =
2176       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2177   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2178   return Schedule != OMP_sch_static;
2179 }
2180 
2181 void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF,
2182                                           SourceLocation Loc,
2183                                           OpenMPScheduleClauseKind ScheduleKind,
2184                                           unsigned IVSize, bool IVSigned,
2185                                           bool Ordered, llvm::Value *UB,
2186                                           llvm::Value *Chunk) {
2187   if (!CGF.HaveInsertPoint())
2188     return;
2189   OpenMPSchedType Schedule =
2190       getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered);
2191   assert(Ordered ||
2192          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2193           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked));
2194   // Call __kmpc_dispatch_init(
2195   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2196   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2197   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2198 
2199   // If the Chunk was not specified in the clause - use default value 1.
2200   if (Chunk == nullptr)
2201     Chunk = CGF.Builder.getIntN(IVSize, 1);
2202   llvm::Value *Args[] = {
2203       emitUpdateLocation(CGF, Loc),
2204       getThreadID(CGF, Loc),
2205       CGF.Builder.getInt32(Schedule), // Schedule type
2206       CGF.Builder.getIntN(IVSize, 0), // Lower
2207       UB,                             // Upper
2208       CGF.Builder.getIntN(IVSize, 1), // Stride
2209       Chunk                           // Chunk
2210   };
2211   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2212 }
2213 
2214 static void emitForStaticInitCall(CodeGenFunction &CGF,
2215                                   SourceLocation Loc,
2216                                   llvm::Value * UpdateLocation,
2217                                   llvm::Value * ThreadId,
2218                                   llvm::Constant * ForStaticInitFunction,
2219                                   OpenMPSchedType Schedule,
2220                                   unsigned IVSize, bool IVSigned, bool Ordered,
2221                                   Address IL, Address LB, Address UB,
2222                                   Address ST, llvm::Value *Chunk) {
2223   if (!CGF.HaveInsertPoint())
2224      return;
2225 
2226    assert(!Ordered);
2227    assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2228           Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2229           Schedule == OMP_dist_sch_static ||
2230           Schedule == OMP_dist_sch_static_chunked);
2231 
2232    // Call __kmpc_for_static_init(
2233    //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2234    //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2235    //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2236    //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2237    if (Chunk == nullptr) {
2238      assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2239              Schedule == OMP_dist_sch_static) &&
2240             "expected static non-chunked schedule");
2241      // If the Chunk was not specified in the clause - use default value 1.
2242        Chunk = CGF.Builder.getIntN(IVSize, 1);
2243    } else {
2244      assert((Schedule == OMP_sch_static_chunked ||
2245              Schedule == OMP_ord_static_chunked ||
2246              Schedule == OMP_dist_sch_static_chunked) &&
2247             "expected static chunked schedule");
2248    }
2249    llvm::Value *Args[] = {
2250      UpdateLocation,
2251      ThreadId,
2252      CGF.Builder.getInt32(Schedule), // Schedule type
2253      IL.getPointer(),                // &isLastIter
2254      LB.getPointer(),                // &LB
2255      UB.getPointer(),                // &UB
2256      ST.getPointer(),                // &Stride
2257      CGF.Builder.getIntN(IVSize, 1), // Incr
2258      Chunk                           // Chunk
2259    };
2260    CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2261 }
2262 
2263 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2264                                         SourceLocation Loc,
2265                                         OpenMPScheduleClauseKind ScheduleKind,
2266                                         unsigned IVSize, bool IVSigned,
2267                                         bool Ordered, Address IL, Address LB,
2268                                         Address UB, Address ST,
2269                                         llvm::Value *Chunk) {
2270   OpenMPSchedType ScheduleNum = getRuntimeSchedule(ScheduleKind, Chunk != nullptr,
2271                                                    Ordered);
2272   auto *UpdatedLocation = emitUpdateLocation(CGF, Loc);
2273   auto *ThreadId = getThreadID(CGF, Loc);
2274   auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned);
2275   emitForStaticInitCall(CGF, Loc, UpdatedLocation, ThreadId, StaticInitFunction,
2276       ScheduleNum, IVSize, IVSigned, Ordered, IL, LB, UB, ST, Chunk);
2277 }
2278 
2279 void CGOpenMPRuntime::emitDistributeStaticInit(CodeGenFunction &CGF,
2280     SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind,
2281     unsigned IVSize, bool IVSigned,
2282     bool Ordered, Address IL, Address LB,
2283     Address UB, Address ST,
2284     llvm::Value *Chunk) {
2285   OpenMPSchedType ScheduleNum = getRuntimeSchedule(SchedKind, Chunk != nullptr);
2286   auto *UpdatedLocation = emitUpdateLocation(CGF, Loc);
2287   auto *ThreadId = getThreadID(CGF, Loc);
2288   auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned);
2289   emitForStaticInitCall(CGF, Loc, UpdatedLocation, ThreadId, StaticInitFunction,
2290       ScheduleNum, IVSize, IVSigned, Ordered, IL, LB, UB, ST, Chunk);
2291 }
2292 
2293 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2294                                           SourceLocation Loc) {
2295   if (!CGF.HaveInsertPoint())
2296     return;
2297   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2298   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2299   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
2300                       Args);
2301 }
2302 
2303 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2304                                                  SourceLocation Loc,
2305                                                  unsigned IVSize,
2306                                                  bool IVSigned) {
2307   if (!CGF.HaveInsertPoint())
2308     return;
2309   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2310   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2311   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2312 }
2313 
2314 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2315                                           SourceLocation Loc, unsigned IVSize,
2316                                           bool IVSigned, Address IL,
2317                                           Address LB, Address UB,
2318                                           Address ST) {
2319   // Call __kmpc_dispatch_next(
2320   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2321   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2322   //          kmp_int[32|64] *p_stride);
2323   llvm::Value *Args[] = {
2324       emitUpdateLocation(CGF, Loc),
2325       getThreadID(CGF, Loc),
2326       IL.getPointer(), // &isLastIter
2327       LB.getPointer(), // &Lower
2328       UB.getPointer(), // &Upper
2329       ST.getPointer()  // &Stride
2330   };
2331   llvm::Value *Call =
2332       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2333   return CGF.EmitScalarConversion(
2334       Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true),
2335       CGF.getContext().BoolTy, Loc);
2336 }
2337 
2338 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2339                                            llvm::Value *NumThreads,
2340                                            SourceLocation Loc) {
2341   if (!CGF.HaveInsertPoint())
2342     return;
2343   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2344   llvm::Value *Args[] = {
2345       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2346       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2347   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
2348                       Args);
2349 }
2350 
2351 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2352                                          OpenMPProcBindClauseKind ProcBind,
2353                                          SourceLocation Loc) {
2354   if (!CGF.HaveInsertPoint())
2355     return;
2356   // Constants for proc bind value accepted by the runtime.
2357   enum ProcBindTy {
2358     ProcBindFalse = 0,
2359     ProcBindTrue,
2360     ProcBindMaster,
2361     ProcBindClose,
2362     ProcBindSpread,
2363     ProcBindIntel,
2364     ProcBindDefault
2365   } RuntimeProcBind;
2366   switch (ProcBind) {
2367   case OMPC_PROC_BIND_master:
2368     RuntimeProcBind = ProcBindMaster;
2369     break;
2370   case OMPC_PROC_BIND_close:
2371     RuntimeProcBind = ProcBindClose;
2372     break;
2373   case OMPC_PROC_BIND_spread:
2374     RuntimeProcBind = ProcBindSpread;
2375     break;
2376   case OMPC_PROC_BIND_unknown:
2377     llvm_unreachable("Unsupported proc_bind value.");
2378   }
2379   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2380   llvm::Value *Args[] = {
2381       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2382       llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
2383   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
2384 }
2385 
2386 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2387                                 SourceLocation Loc) {
2388   if (!CGF.HaveInsertPoint())
2389     return;
2390   // Build call void __kmpc_flush(ident_t *loc)
2391   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
2392                       emitUpdateLocation(CGF, Loc));
2393 }
2394 
2395 namespace {
2396 /// \brief Indexes of fields for type kmp_task_t.
2397 enum KmpTaskTFields {
2398   /// \brief List of shared variables.
2399   KmpTaskTShareds,
2400   /// \brief Task routine.
2401   KmpTaskTRoutine,
2402   /// \brief Partition id for the untied tasks.
2403   KmpTaskTPartId,
2404   /// \brief Function with call of destructors for private variables.
2405   KmpTaskTDestructors,
2406 };
2407 } // anonymous namespace
2408 
2409 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2410   // FIXME: Add other entries type when they become supported.
2411   return OffloadEntriesTargetRegion.empty();
2412 }
2413 
2414 /// \brief Initialize target region entry.
2415 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2416     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2417                                     StringRef ParentName, unsigned LineNum,
2418                                     unsigned Order) {
2419   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2420                                              "only required for the device "
2421                                              "code generation.");
2422   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2423       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr);
2424   ++OffloadingEntriesNum;
2425 }
2426 
2427 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2428     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2429                                   StringRef ParentName, unsigned LineNum,
2430                                   llvm::Constant *Addr, llvm::Constant *ID) {
2431   // If we are emitting code for a target, the entry is already initialized,
2432   // only has to be registered.
2433   if (CGM.getLangOpts().OpenMPIsDevice) {
2434     assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
2435            "Entry must exist.");
2436     auto &Entry =
2437         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
2438     assert(Entry.isValid() && "Entry not initialized!");
2439     Entry.setAddress(Addr);
2440     Entry.setID(ID);
2441     return;
2442   } else {
2443     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID);
2444     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
2445   }
2446 }
2447 
2448 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
2449     unsigned DeviceID, unsigned FileID, StringRef ParentName,
2450     unsigned LineNum) const {
2451   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
2452   if (PerDevice == OffloadEntriesTargetRegion.end())
2453     return false;
2454   auto PerFile = PerDevice->second.find(FileID);
2455   if (PerFile == PerDevice->second.end())
2456     return false;
2457   auto PerParentName = PerFile->second.find(ParentName);
2458   if (PerParentName == PerFile->second.end())
2459     return false;
2460   auto PerLine = PerParentName->second.find(LineNum);
2461   if (PerLine == PerParentName->second.end())
2462     return false;
2463   // Fail if this entry is already registered.
2464   if (PerLine->second.getAddress() || PerLine->second.getID())
2465     return false;
2466   return true;
2467 }
2468 
2469 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
2470     const OffloadTargetRegionEntryInfoActTy &Action) {
2471   // Scan all target region entries and perform the provided action.
2472   for (auto &D : OffloadEntriesTargetRegion)
2473     for (auto &F : D.second)
2474       for (auto &P : F.second)
2475         for (auto &L : P.second)
2476           Action(D.first, F.first, P.first(), L.first, L.second);
2477 }
2478 
2479 /// \brief Create a Ctor/Dtor-like function whose body is emitted through
2480 /// \a Codegen. This is used to emit the two functions that register and
2481 /// unregister the descriptor of the current compilation unit.
2482 static llvm::Function *
2483 createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name,
2484                                          const RegionCodeGenTy &Codegen) {
2485   auto &C = CGM.getContext();
2486   FunctionArgList Args;
2487   ImplicitParamDecl DummyPtr(C, /*DC=*/nullptr, SourceLocation(),
2488                              /*Id=*/nullptr, C.VoidPtrTy);
2489   Args.push_back(&DummyPtr);
2490 
2491   CodeGenFunction CGF(CGM);
2492   GlobalDecl();
2493   auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2494   auto FTy = CGM.getTypes().GetFunctionType(FI);
2495   auto *Fn =
2496       CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation());
2497   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args, SourceLocation());
2498   Codegen(CGF);
2499   CGF.FinishFunction();
2500   return Fn;
2501 }
2502 
2503 llvm::Function *
2504 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
2505 
2506   // If we don't have entries or if we are emitting code for the device, we
2507   // don't need to do anything.
2508   if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
2509     return nullptr;
2510 
2511   auto &M = CGM.getModule();
2512   auto &C = CGM.getContext();
2513 
2514   // Get list of devices we care about
2515   auto &Devices = CGM.getLangOpts().OMPTargetTriples;
2516 
2517   // We should be creating an offloading descriptor only if there are devices
2518   // specified.
2519   assert(!Devices.empty() && "No OpenMP offloading devices??");
2520 
2521   // Create the external variables that will point to the begin and end of the
2522   // host entries section. These will be defined by the linker.
2523   auto *OffloadEntryTy =
2524       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
2525   llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable(
2526       M, OffloadEntryTy, /*isConstant=*/true,
2527       llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
2528       ".omp_offloading.entries_begin");
2529   llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable(
2530       M, OffloadEntryTy, /*isConstant=*/true,
2531       llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
2532       ".omp_offloading.entries_end");
2533 
2534   // Create all device images
2535   llvm::SmallVector<llvm::Constant *, 4> DeviceImagesEntires;
2536   auto *DeviceImageTy = cast<llvm::StructType>(
2537       CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy()));
2538 
2539   for (unsigned i = 0; i < Devices.size(); ++i) {
2540     StringRef T = Devices[i].getTriple();
2541     auto *ImgBegin = new llvm::GlobalVariable(
2542         M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
2543         /*Initializer=*/nullptr,
2544         Twine(".omp_offloading.img_start.") + Twine(T));
2545     auto *ImgEnd = new llvm::GlobalVariable(
2546         M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
2547         /*Initializer=*/nullptr, Twine(".omp_offloading.img_end.") + Twine(T));
2548 
2549     llvm::Constant *Dev =
2550         llvm::ConstantStruct::get(DeviceImageTy, ImgBegin, ImgEnd,
2551                                   HostEntriesBegin, HostEntriesEnd, nullptr);
2552     DeviceImagesEntires.push_back(Dev);
2553   }
2554 
2555   // Create device images global array.
2556   llvm::ArrayType *DeviceImagesInitTy =
2557       llvm::ArrayType::get(DeviceImageTy, DeviceImagesEntires.size());
2558   llvm::Constant *DeviceImagesInit =
2559       llvm::ConstantArray::get(DeviceImagesInitTy, DeviceImagesEntires);
2560 
2561   llvm::GlobalVariable *DeviceImages = new llvm::GlobalVariable(
2562       M, DeviceImagesInitTy, /*isConstant=*/true,
2563       llvm::GlobalValue::InternalLinkage, DeviceImagesInit,
2564       ".omp_offloading.device_images");
2565   DeviceImages->setUnnamedAddr(true);
2566 
2567   // This is a Zero array to be used in the creation of the constant expressions
2568   llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
2569                              llvm::Constant::getNullValue(CGM.Int32Ty)};
2570 
2571   // Create the target region descriptor.
2572   auto *BinaryDescriptorTy = cast<llvm::StructType>(
2573       CGM.getTypes().ConvertTypeForMem(getTgtBinaryDescriptorQTy()));
2574   llvm::Constant *TargetRegionsDescriptorInit = llvm::ConstantStruct::get(
2575       BinaryDescriptorTy, llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()),
2576       llvm::ConstantExpr::getGetElementPtr(DeviceImagesInitTy, DeviceImages,
2577                                            Index),
2578       HostEntriesBegin, HostEntriesEnd, nullptr);
2579 
2580   auto *Desc = new llvm::GlobalVariable(
2581       M, BinaryDescriptorTy, /*isConstant=*/true,
2582       llvm::GlobalValue::InternalLinkage, TargetRegionsDescriptorInit,
2583       ".omp_offloading.descriptor");
2584 
2585   // Emit code to register or unregister the descriptor at execution
2586   // startup or closing, respectively.
2587 
2588   // Create a variable to drive the registration and unregistration of the
2589   // descriptor, so we can reuse the logic that emits Ctors and Dtors.
2590   auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var");
2591   ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(),
2592                                 IdentInfo, C.CharTy);
2593 
2594   auto *UnRegFn = createOffloadingBinaryDescriptorFunction(
2595       CGM, ".omp_offloading.descriptor_unreg",
2596       [&](CodeGenFunction &CGF, PrePostActionTy &) {
2597         CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
2598                              Desc);
2599       });
2600   auto *RegFn = createOffloadingBinaryDescriptorFunction(
2601       CGM, ".omp_offloading.descriptor_reg",
2602       [&](CodeGenFunction &CGF, PrePostActionTy &) {
2603         CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib),
2604                              Desc);
2605         CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
2606       });
2607   return RegFn;
2608 }
2609 
2610 void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *ID,
2611                                          llvm::Constant *Addr, uint64_t Size) {
2612   StringRef Name = Addr->getName();
2613   auto *TgtOffloadEntryType = cast<llvm::StructType>(
2614       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()));
2615   llvm::LLVMContext &C = CGM.getModule().getContext();
2616   llvm::Module &M = CGM.getModule();
2617 
2618   // Make sure the address has the right type.
2619   llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy);
2620 
2621   // Create constant string with the name.
2622   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
2623 
2624   llvm::GlobalVariable *Str =
2625       new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true,
2626                                llvm::GlobalValue::InternalLinkage, StrPtrInit,
2627                                ".omp_offloading.entry_name");
2628   Str->setUnnamedAddr(true);
2629   llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy);
2630 
2631   // Create the entry struct.
2632   llvm::Constant *EntryInit = llvm::ConstantStruct::get(
2633       TgtOffloadEntryType, AddrPtr, StrPtr,
2634       llvm::ConstantInt::get(CGM.SizeTy, Size), nullptr);
2635   llvm::GlobalVariable *Entry = new llvm::GlobalVariable(
2636       M, TgtOffloadEntryType, true, llvm::GlobalValue::ExternalLinkage,
2637       EntryInit, ".omp_offloading.entry");
2638 
2639   // The entry has to be created in the section the linker expects it to be.
2640   Entry->setSection(".omp_offloading.entries");
2641   // We can't have any padding between symbols, so we need to have 1-byte
2642   // alignment.
2643   Entry->setAlignment(1);
2644 }
2645 
2646 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2647   // Emit the offloading entries and metadata so that the device codegen side
2648   // can
2649   // easily figure out what to emit. The produced metadata looks like this:
2650   //
2651   // !omp_offload.info = !{!1, ...}
2652   //
2653   // Right now we only generate metadata for function that contain target
2654   // regions.
2655 
2656   // If we do not have entries, we dont need to do anything.
2657   if (OffloadEntriesInfoManager.empty())
2658     return;
2659 
2660   llvm::Module &M = CGM.getModule();
2661   llvm::LLVMContext &C = M.getContext();
2662   SmallVector<OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
2663       OrderedEntries(OffloadEntriesInfoManager.size());
2664 
2665   // Create the offloading info metadata node.
2666   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
2667 
2668   // Auxiliar methods to create metadata values and strings.
2669   auto getMDInt = [&](unsigned v) {
2670     return llvm::ConstantAsMetadata::get(
2671         llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v));
2672   };
2673 
2674   auto getMDString = [&](StringRef v) { return llvm::MDString::get(C, v); };
2675 
2676   // Create function that emits metadata for each target region entry;
2677   auto &&TargetRegionMetadataEmitter = [&](
2678       unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line,
2679       OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
2680     llvm::SmallVector<llvm::Metadata *, 32> Ops;
2681     // Generate metadata for target regions. Each entry of this metadata
2682     // contains:
2683     // - Entry 0 -> Kind of this type of metadata (0).
2684     // - Entry 1 -> Device ID of the file where the entry was identified.
2685     // - Entry 2 -> File ID of the file where the entry was identified.
2686     // - Entry 3 -> Mangled name of the function where the entry was identified.
2687     // - Entry 4 -> Line in the file where the entry was identified.
2688     // - Entry 5 -> Order the entry was created.
2689     // The first element of the metadata node is the kind.
2690     Ops.push_back(getMDInt(E.getKind()));
2691     Ops.push_back(getMDInt(DeviceID));
2692     Ops.push_back(getMDInt(FileID));
2693     Ops.push_back(getMDString(ParentName));
2694     Ops.push_back(getMDInt(Line));
2695     Ops.push_back(getMDInt(E.getOrder()));
2696 
2697     // Save this entry in the right position of the ordered entries array.
2698     OrderedEntries[E.getOrder()] = &E;
2699 
2700     // Add metadata to the named metadata node.
2701     MD->addOperand(llvm::MDNode::get(C, Ops));
2702   };
2703 
2704   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
2705       TargetRegionMetadataEmitter);
2706 
2707   for (auto *E : OrderedEntries) {
2708     assert(E && "All ordered entries must exist!");
2709     if (auto *CE =
2710             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
2711                 E)) {
2712       assert(CE->getID() && CE->getAddress() &&
2713              "Entry ID and Addr are invalid!");
2714       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0);
2715     } else
2716       llvm_unreachable("Unsupported entry kind.");
2717   }
2718 }
2719 
2720 /// \brief Loads all the offload entries information from the host IR
2721 /// metadata.
2722 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
2723   // If we are in target mode, load the metadata from the host IR. This code has
2724   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
2725 
2726   if (!CGM.getLangOpts().OpenMPIsDevice)
2727     return;
2728 
2729   if (CGM.getLangOpts().OMPHostIRFile.empty())
2730     return;
2731 
2732   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
2733   if (Buf.getError())
2734     return;
2735 
2736   llvm::LLVMContext C;
2737   auto ME = llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C);
2738 
2739   if (ME.getError())
2740     return;
2741 
2742   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
2743   if (!MD)
2744     return;
2745 
2746   for (auto I : MD->operands()) {
2747     llvm::MDNode *MN = cast<llvm::MDNode>(I);
2748 
2749     auto getMDInt = [&](unsigned Idx) {
2750       llvm::ConstantAsMetadata *V =
2751           cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
2752       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
2753     };
2754 
2755     auto getMDString = [&](unsigned Idx) {
2756       llvm::MDString *V = cast<llvm::MDString>(MN->getOperand(Idx));
2757       return V->getString();
2758     };
2759 
2760     switch (getMDInt(0)) {
2761     default:
2762       llvm_unreachable("Unexpected metadata!");
2763       break;
2764     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
2765         OFFLOAD_ENTRY_INFO_TARGET_REGION:
2766       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
2767           /*DeviceID=*/getMDInt(1), /*FileID=*/getMDInt(2),
2768           /*ParentName=*/getMDString(3), /*Line=*/getMDInt(4),
2769           /*Order=*/getMDInt(5));
2770       break;
2771     }
2772   }
2773 }
2774 
2775 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
2776   if (!KmpRoutineEntryPtrTy) {
2777     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2778     auto &C = CGM.getContext();
2779     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2780     FunctionProtoType::ExtProtoInfo EPI;
2781     KmpRoutineEntryPtrQTy = C.getPointerType(
2782         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2783     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
2784   }
2785 }
2786 
2787 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
2788                                        QualType FieldTy) {
2789   auto *Field = FieldDecl::Create(
2790       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
2791       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
2792       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
2793   Field->setAccess(AS_public);
2794   DC->addDecl(Field);
2795   return Field;
2796 }
2797 
2798 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
2799 
2800   // Make sure the type of the entry is already created. This is the type we
2801   // have to create:
2802   // struct __tgt_offload_entry{
2803   //   void      *addr;       // Pointer to the offload entry info.
2804   //                          // (function or global)
2805   //   char      *name;       // Name of the function or global.
2806   //   size_t     size;       // Size of the entry info (0 if it a function).
2807   // };
2808   if (TgtOffloadEntryQTy.isNull()) {
2809     ASTContext &C = CGM.getContext();
2810     auto *RD = C.buildImplicitRecord("__tgt_offload_entry");
2811     RD->startDefinition();
2812     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2813     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
2814     addFieldToRecordDecl(C, RD, C.getSizeType());
2815     RD->completeDefinition();
2816     TgtOffloadEntryQTy = C.getRecordType(RD);
2817   }
2818   return TgtOffloadEntryQTy;
2819 }
2820 
2821 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() {
2822   // These are the types we need to build:
2823   // struct __tgt_device_image{
2824   // void   *ImageStart;       // Pointer to the target code start.
2825   // void   *ImageEnd;         // Pointer to the target code end.
2826   // // We also add the host entries to the device image, as it may be useful
2827   // // for the target runtime to have access to that information.
2828   // __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all
2829   //                                       // the entries.
2830   // __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
2831   //                                       // entries (non inclusive).
2832   // };
2833   if (TgtDeviceImageQTy.isNull()) {
2834     ASTContext &C = CGM.getContext();
2835     auto *RD = C.buildImplicitRecord("__tgt_device_image");
2836     RD->startDefinition();
2837     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2838     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2839     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
2840     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
2841     RD->completeDefinition();
2842     TgtDeviceImageQTy = C.getRecordType(RD);
2843   }
2844   return TgtDeviceImageQTy;
2845 }
2846 
2847 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() {
2848   // struct __tgt_bin_desc{
2849   //   int32_t              NumDevices;      // Number of devices supported.
2850   //   __tgt_device_image   *DeviceImages;   // Arrays of device images
2851   //                                         // (one per device).
2852   //   __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all the
2853   //                                         // entries.
2854   //   __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
2855   //                                         // entries (non inclusive).
2856   // };
2857   if (TgtBinaryDescriptorQTy.isNull()) {
2858     ASTContext &C = CGM.getContext();
2859     auto *RD = C.buildImplicitRecord("__tgt_bin_desc");
2860     RD->startDefinition();
2861     addFieldToRecordDecl(
2862         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
2863     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy()));
2864     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
2865     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
2866     RD->completeDefinition();
2867     TgtBinaryDescriptorQTy = C.getRecordType(RD);
2868   }
2869   return TgtBinaryDescriptorQTy;
2870 }
2871 
2872 namespace {
2873 struct PrivateHelpersTy {
2874   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
2875                    const VarDecl *PrivateElemInit)
2876       : Original(Original), PrivateCopy(PrivateCopy),
2877         PrivateElemInit(PrivateElemInit) {}
2878   const VarDecl *Original;
2879   const VarDecl *PrivateCopy;
2880   const VarDecl *PrivateElemInit;
2881 };
2882 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2883 } // anonymous namespace
2884 
2885 static RecordDecl *
2886 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
2887   if (!Privates.empty()) {
2888     auto &C = CGM.getContext();
2889     // Build struct .kmp_privates_t. {
2890     //         /*  private vars  */
2891     //       };
2892     auto *RD = C.buildImplicitRecord(".kmp_privates.t");
2893     RD->startDefinition();
2894     for (auto &&Pair : Privates) {
2895       auto *VD = Pair.second.Original;
2896       auto Type = VD->getType();
2897       Type = Type.getNonReferenceType();
2898       auto *FD = addFieldToRecordDecl(C, RD, Type);
2899       if (VD->hasAttrs()) {
2900         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
2901              E(VD->getAttrs().end());
2902              I != E; ++I)
2903           FD->addAttr(*I);
2904       }
2905     }
2906     RD->completeDefinition();
2907     return RD;
2908   }
2909   return nullptr;
2910 }
2911 
2912 static RecordDecl *
2913 createKmpTaskTRecordDecl(CodeGenModule &CGM, QualType KmpInt32Ty,
2914                          QualType KmpRoutineEntryPointerQTy) {
2915   auto &C = CGM.getContext();
2916   // Build struct kmp_task_t {
2917   //         void *              shareds;
2918   //         kmp_routine_entry_t routine;
2919   //         kmp_int32           part_id;
2920   //         kmp_routine_entry_t destructors;
2921   //       };
2922   auto *RD = C.buildImplicitRecord("kmp_task_t");
2923   RD->startDefinition();
2924   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2925   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2926   addFieldToRecordDecl(C, RD, KmpInt32Ty);
2927   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2928   RD->completeDefinition();
2929   return RD;
2930 }
2931 
2932 static RecordDecl *
2933 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
2934                                      ArrayRef<PrivateDataTy> Privates) {
2935   auto &C = CGM.getContext();
2936   // Build struct kmp_task_t_with_privates {
2937   //         kmp_task_t task_data;
2938   //         .kmp_privates_t. privates;
2939   //       };
2940   auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
2941   RD->startDefinition();
2942   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
2943   if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) {
2944     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
2945   }
2946   RD->completeDefinition();
2947   return RD;
2948 }
2949 
2950 /// \brief Emit a proxy function which accepts kmp_task_t as the second
2951 /// argument.
2952 /// \code
2953 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
2954 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map,
2955 ///   tt->shareds);
2956 ///   return 0;
2957 /// }
2958 /// \endcode
2959 static llvm::Value *
2960 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
2961                       QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy,
2962                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
2963                       QualType SharedsPtrTy, llvm::Value *TaskFunction,
2964                       llvm::Value *TaskPrivatesMap) {
2965   auto &C = CGM.getContext();
2966   FunctionArgList Args;
2967   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
2968   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
2969                                 /*Id=*/nullptr,
2970                                 KmpTaskTWithPrivatesPtrQTy.withRestrict());
2971   Args.push_back(&GtidArg);
2972   Args.push_back(&TaskTypeArg);
2973   auto &TaskEntryFnInfo =
2974       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
2975   auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
2976   auto *TaskEntry =
2977       llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
2978                              ".omp_task_entry.", &CGM.getModule());
2979   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo);
2980   CodeGenFunction CGF(CGM);
2981   CGF.disableDebugInfo();
2982   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
2983 
2984   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
2985   // tt->task_data.shareds);
2986   auto *GtidParam = CGF.EmitLoadOfScalar(
2987       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
2988   LValue TDBase = CGF.EmitLoadOfPointerLValue(
2989       CGF.GetAddrOfLocalVar(&TaskTypeArg),
2990       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
2991   auto *KmpTaskTWithPrivatesQTyRD =
2992       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
2993   LValue Base =
2994       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
2995   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
2996   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
2997   auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
2998   auto *PartidParam = CGF.EmitLoadOfLValue(PartIdLVal, Loc).getScalarVal();
2999 
3000   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3001   auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3002   auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3003       CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(),
3004       CGF.ConvertTypeForMem(SharedsPtrTy));
3005 
3006   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3007   llvm::Value *PrivatesParam;
3008   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3009     auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3010     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3011         PrivatesLVal.getPointer(), CGF.VoidPtrTy);
3012   } else {
3013     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3014   }
3015 
3016   llvm::Value *CallArgs[] = {GtidParam, PartidParam, PrivatesParam,
3017                              TaskPrivatesMap, SharedsParam};
3018   CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
3019   CGF.EmitStoreThroughLValue(
3020       RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3021       CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3022   CGF.FinishFunction();
3023   return TaskEntry;
3024 }
3025 
3026 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3027                                             SourceLocation Loc,
3028                                             QualType KmpInt32Ty,
3029                                             QualType KmpTaskTWithPrivatesPtrQTy,
3030                                             QualType KmpTaskTWithPrivatesQTy) {
3031   auto &C = CGM.getContext();
3032   FunctionArgList Args;
3033   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
3034   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
3035                                 /*Id=*/nullptr,
3036                                 KmpTaskTWithPrivatesPtrQTy.withRestrict());
3037   Args.push_back(&GtidArg);
3038   Args.push_back(&TaskTypeArg);
3039   FunctionType::ExtInfo Info;
3040   auto &DestructorFnInfo =
3041       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3042   auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo);
3043   auto *DestructorFn =
3044       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3045                              ".omp_task_destructor.", &CGM.getModule());
3046   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn,
3047                                     DestructorFnInfo);
3048   CodeGenFunction CGF(CGM);
3049   CGF.disableDebugInfo();
3050   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3051                     Args);
3052 
3053   LValue Base = CGF.EmitLoadOfPointerLValue(
3054       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3055       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3056   auto *KmpTaskTWithPrivatesQTyRD =
3057       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3058   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3059   Base = CGF.EmitLValueForField(Base, *FI);
3060   for (auto *Field :
3061        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3062     if (auto DtorKind = Field->getType().isDestructedType()) {
3063       auto FieldLValue = CGF.EmitLValueForField(Base, Field);
3064       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
3065     }
3066   }
3067   CGF.FinishFunction();
3068   return DestructorFn;
3069 }
3070 
3071 /// \brief Emit a privates mapping function for correct handling of private and
3072 /// firstprivate variables.
3073 /// \code
3074 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3075 /// **noalias priv1,...,  <tyn> **noalias privn) {
3076 ///   *priv1 = &.privates.priv1;
3077 ///   ...;
3078 ///   *privn = &.privates.privn;
3079 /// }
3080 /// \endcode
3081 static llvm::Value *
3082 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3083                                ArrayRef<const Expr *> PrivateVars,
3084                                ArrayRef<const Expr *> FirstprivateVars,
3085                                QualType PrivatesQTy,
3086                                ArrayRef<PrivateDataTy> Privates) {
3087   auto &C = CGM.getContext();
3088   FunctionArgList Args;
3089   ImplicitParamDecl TaskPrivatesArg(
3090       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3091       C.getPointerType(PrivatesQTy).withConst().withRestrict());
3092   Args.push_back(&TaskPrivatesArg);
3093   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
3094   unsigned Counter = 1;
3095   for (auto *E: PrivateVars) {
3096     Args.push_back(ImplicitParamDecl::Create(
3097         C, /*DC=*/nullptr, Loc,
3098         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
3099                             .withConst()
3100                             .withRestrict()));
3101     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3102     PrivateVarsPos[VD] = Counter;
3103     ++Counter;
3104   }
3105   for (auto *E : FirstprivateVars) {
3106     Args.push_back(ImplicitParamDecl::Create(
3107         C, /*DC=*/nullptr, Loc,
3108         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
3109                             .withConst()
3110                             .withRestrict()));
3111     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3112     PrivateVarsPos[VD] = Counter;
3113     ++Counter;
3114   }
3115   auto &TaskPrivatesMapFnInfo =
3116       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3117   auto *TaskPrivatesMapTy =
3118       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3119   auto *TaskPrivatesMap = llvm::Function::Create(
3120       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage,
3121       ".omp_task_privates_map.", &CGM.getModule());
3122   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap,
3123                                     TaskPrivatesMapFnInfo);
3124   TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3125   CodeGenFunction CGF(CGM);
3126   CGF.disableDebugInfo();
3127   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3128                     TaskPrivatesMapFnInfo, Args);
3129 
3130   // *privi = &.privates.privi;
3131   LValue Base = CGF.EmitLoadOfPointerLValue(
3132       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3133       TaskPrivatesArg.getType()->castAs<PointerType>());
3134   auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3135   Counter = 0;
3136   for (auto *Field : PrivatesQTyRD->fields()) {
3137     auto FieldLVal = CGF.EmitLValueForField(Base, Field);
3138     auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3139     auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3140     auto RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3141         RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
3142     CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
3143     ++Counter;
3144   }
3145   CGF.FinishFunction();
3146   return TaskPrivatesMap;
3147 }
3148 
3149 static int array_pod_sort_comparator(const PrivateDataTy *P1,
3150                                      const PrivateDataTy *P2) {
3151   return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0);
3152 }
3153 
3154 void CGOpenMPRuntime::emitTaskCall(
3155     CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D,
3156     bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
3157     llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds,
3158     const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
3159     ArrayRef<const Expr *> PrivateCopies,
3160     ArrayRef<const Expr *> FirstprivateVars,
3161     ArrayRef<const Expr *> FirstprivateCopies,
3162     ArrayRef<const Expr *> FirstprivateInits,
3163     ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependences) {
3164   if (!CGF.HaveInsertPoint())
3165     return;
3166   auto &C = CGM.getContext();
3167   llvm::SmallVector<PrivateDataTy, 8> Privates;
3168   // Aggregate privates and sort them by the alignment.
3169   auto I = PrivateCopies.begin();
3170   for (auto *E : PrivateVars) {
3171     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3172     Privates.push_back(std::make_pair(
3173         C.getDeclAlign(VD),
3174         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3175                          /*PrivateElemInit=*/nullptr)));
3176     ++I;
3177   }
3178   I = FirstprivateCopies.begin();
3179   auto IElemInitRef = FirstprivateInits.begin();
3180   for (auto *E : FirstprivateVars) {
3181     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3182     Privates.push_back(std::make_pair(
3183         C.getDeclAlign(VD),
3184         PrivateHelpersTy(
3185             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3186             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))));
3187     ++I;
3188     ++IElemInitRef;
3189   }
3190   llvm::array_pod_sort(Privates.begin(), Privates.end(),
3191                        array_pod_sort_comparator);
3192   auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3193   // Build type kmp_routine_entry_t (if not built yet).
3194   emitKmpRoutineEntryT(KmpInt32Ty);
3195   // Build type kmp_task_t (if not built yet).
3196   if (KmpTaskTQTy.isNull()) {
3197     KmpTaskTQTy = C.getRecordType(
3198         createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy));
3199   }
3200   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3201   // Build particular struct kmp_task_t for the given task.
3202   auto *KmpTaskTWithPrivatesQTyRD =
3203       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
3204   auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
3205   QualType KmpTaskTWithPrivatesPtrQTy =
3206       C.getPointerType(KmpTaskTWithPrivatesQTy);
3207   auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
3208   auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo();
3209   auto *KmpTaskTWithPrivatesTySize = CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3210   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3211 
3212   // Emit initial values for private copies (if any).
3213   llvm::Value *TaskPrivatesMap = nullptr;
3214   auto *TaskPrivatesMapTy =
3215       std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(),
3216                 3)
3217           ->getType();
3218   if (!Privates.empty()) {
3219     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3220     TaskPrivatesMap = emitTaskPrivateMappingFunction(
3221         CGM, Loc, PrivateVars, FirstprivateVars, FI->getType(), Privates);
3222     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3223         TaskPrivatesMap, TaskPrivatesMapTy);
3224   } else {
3225     TaskPrivatesMap = llvm::ConstantPointerNull::get(
3226         cast<llvm::PointerType>(TaskPrivatesMapTy));
3227   }
3228   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3229   // kmp_task_t *tt);
3230   auto *TaskEntry = emitProxyTaskFunction(
3231       CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy,
3232       KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap);
3233 
3234   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3235   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3236   // kmp_routine_entry_t *task_entry);
3237   // Task flags. Format is taken from
3238   // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
3239   // description of kmp_tasking_flags struct.
3240   const unsigned TiedFlag = 0x1;
3241   const unsigned FinalFlag = 0x2;
3242   unsigned Flags = Tied ? TiedFlag : 0;
3243   auto *TaskFlags =
3244       Final.getPointer()
3245           ? CGF.Builder.CreateSelect(Final.getPointer(),
3246                                      CGF.Builder.getInt32(FinalFlag),
3247                                      CGF.Builder.getInt32(/*C=*/0))
3248           : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0);
3249   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3250   auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3251   llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
3252                               getThreadID(CGF, Loc), TaskFlags,
3253                               KmpTaskTWithPrivatesTySize, SharedsSize,
3254                               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3255                                   TaskEntry, KmpRoutineEntryPtrTy)};
3256   auto *NewTask = CGF.EmitRuntimeCall(
3257       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
3258   auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3259       NewTask, KmpTaskTWithPrivatesPtrTy);
3260   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
3261                                                KmpTaskTWithPrivatesQTy);
3262   LValue TDBase =
3263       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
3264   // Fill the data in the resulting kmp_task_t record.
3265   // Copy shareds if there are any.
3266   Address KmpTaskSharedsPtr = Address::invalid();
3267   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
3268     KmpTaskSharedsPtr =
3269         Address(CGF.EmitLoadOfScalar(
3270                     CGF.EmitLValueForField(
3271                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
3272                                            KmpTaskTShareds)),
3273                     Loc),
3274                 CGF.getNaturalTypeAlignment(SharedsTy));
3275     CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy);
3276   }
3277   // Emit initial values for private copies (if any).
3278   bool NeedsCleanup = false;
3279   if (!Privates.empty()) {
3280     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3281     auto PrivatesBase = CGF.EmitLValueForField(Base, *FI);
3282     FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3283     LValue SharedsBase;
3284     if (!FirstprivateVars.empty()) {
3285       SharedsBase = CGF.MakeAddrLValue(
3286           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3287               KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3288           SharedsTy);
3289     }
3290     CodeGenFunction::CGCapturedStmtInfo CapturesInfo(
3291         cast<CapturedStmt>(*D.getAssociatedStmt()));
3292     for (auto &&Pair : Privates) {
3293       auto *VD = Pair.second.PrivateCopy;
3294       auto *Init = VD->getAnyInitializer();
3295       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3296       if (Init) {
3297         if (auto *Elem = Pair.second.PrivateElemInit) {
3298           auto *OriginalVD = Pair.second.Original;
3299           auto *SharedField = CapturesInfo.lookup(OriginalVD);
3300           auto SharedRefLValue =
3301               CGF.EmitLValueForField(SharedsBase, SharedField);
3302           SharedRefLValue = CGF.MakeAddrLValue(
3303               Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
3304               SharedRefLValue.getType(), AlignmentSource::Decl);
3305           QualType Type = OriginalVD->getType();
3306           if (Type->isArrayType()) {
3307             // Initialize firstprivate array.
3308             if (!isa<CXXConstructExpr>(Init) ||
3309                 CGF.isTrivialInitializer(Init)) {
3310               // Perform simple memcpy.
3311               CGF.EmitAggregateAssign(PrivateLValue.getAddress(),
3312                                       SharedRefLValue.getAddress(), Type);
3313             } else {
3314               // Initialize firstprivate array using element-by-element
3315               // intialization.
3316               CGF.EmitOMPAggregateAssign(
3317                   PrivateLValue.getAddress(), SharedRefLValue.getAddress(),
3318                   Type, [&CGF, Elem, Init, &CapturesInfo](
3319                             Address DestElement, Address SrcElement) {
3320                     // Clean up any temporaries needed by the initialization.
3321                     CodeGenFunction::OMPPrivateScope InitScope(CGF);
3322                     InitScope.addPrivate(Elem, [SrcElement]() -> Address {
3323                       return SrcElement;
3324                     });
3325                     (void)InitScope.Privatize();
3326                     // Emit initialization for single element.
3327                     CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3328                         CGF, &CapturesInfo);
3329                     CGF.EmitAnyExprToMem(Init, DestElement,
3330                                          Init->getType().getQualifiers(),
3331                                          /*IsInitializer=*/false);
3332                   });
3333             }
3334           } else {
3335             CodeGenFunction::OMPPrivateScope InitScope(CGF);
3336             InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
3337               return SharedRefLValue.getAddress();
3338             });
3339             (void)InitScope.Privatize();
3340             CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3341             CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3342                                /*capturedByInit=*/false);
3343           }
3344         } else {
3345           CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3346         }
3347       }
3348       NeedsCleanup = NeedsCleanup || FI->getType().isDestructedType();
3349       ++FI;
3350     }
3351   }
3352   // Provide pointer to function with destructors for privates.
3353   llvm::Value *DestructorFn =
3354       NeedsCleanup ? emitDestructorsFunction(CGM, Loc, KmpInt32Ty,
3355                                              KmpTaskTWithPrivatesPtrQTy,
3356                                              KmpTaskTWithPrivatesQTy)
3357                    : llvm::ConstantPointerNull::get(
3358                          cast<llvm::PointerType>(KmpRoutineEntryPtrTy));
3359   LValue Destructor = CGF.EmitLValueForField(
3360       TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTDestructors));
3361   CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3362                             DestructorFn, KmpRoutineEntryPtrTy),
3363                         Destructor);
3364 
3365   // Process list of dependences.
3366   Address DependenciesArray = Address::invalid();
3367   unsigned NumDependencies = Dependences.size();
3368   if (NumDependencies) {
3369     // Dependence kind for RTL.
3370     enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 };
3371     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
3372     RecordDecl *KmpDependInfoRD;
3373     QualType FlagsTy =
3374         C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
3375     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
3376     if (KmpDependInfoTy.isNull()) {
3377       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
3378       KmpDependInfoRD->startDefinition();
3379       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
3380       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
3381       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
3382       KmpDependInfoRD->completeDefinition();
3383       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
3384     } else {
3385       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
3386     }
3387     CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy);
3388     // Define type kmp_depend_info[<Dependences.size()>];
3389     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
3390         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
3391         ArrayType::Normal, /*IndexTypeQuals=*/0);
3392     // kmp_depend_info[<Dependences.size()>] deps;
3393     DependenciesArray = CGF.CreateMemTemp(KmpDependInfoArrayTy);
3394     for (unsigned i = 0; i < NumDependencies; ++i) {
3395       const Expr *E = Dependences[i].second;
3396       auto Addr = CGF.EmitLValue(E);
3397       llvm::Value *Size;
3398       QualType Ty = E->getType();
3399       if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
3400         LValue UpAddrLVal =
3401             CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
3402         llvm::Value *UpAddr =
3403             CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
3404         llvm::Value *LowIntPtr =
3405             CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
3406         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
3407         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3408       } else
3409         Size = CGF.getTypeSize(Ty);
3410       auto Base = CGF.MakeAddrLValue(
3411           CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize),
3412           KmpDependInfoTy);
3413       // deps[i].base_addr = &<Dependences[i].second>;
3414       auto BaseAddrLVal = CGF.EmitLValueForField(
3415           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
3416       CGF.EmitStoreOfScalar(
3417           CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
3418           BaseAddrLVal);
3419       // deps[i].len = sizeof(<Dependences[i].second>);
3420       auto LenLVal = CGF.EmitLValueForField(
3421           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
3422       CGF.EmitStoreOfScalar(Size, LenLVal);
3423       // deps[i].flags = <Dependences[i].first>;
3424       RTLDependenceKindTy DepKind;
3425       switch (Dependences[i].first) {
3426       case OMPC_DEPEND_in:
3427         DepKind = DepIn;
3428         break;
3429       // Out and InOut dependencies must use the same code.
3430       case OMPC_DEPEND_out:
3431       case OMPC_DEPEND_inout:
3432         DepKind = DepInOut;
3433         break;
3434       case OMPC_DEPEND_source:
3435       case OMPC_DEPEND_sink:
3436       case OMPC_DEPEND_unknown:
3437         llvm_unreachable("Unknown task dependence type");
3438       }
3439       auto FlagsLVal = CGF.EmitLValueForField(
3440           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
3441       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
3442                             FlagsLVal);
3443     }
3444     DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3445         CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()),
3446         CGF.VoidPtrTy);
3447   }
3448 
3449   // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
3450   // libcall.
3451   // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
3452   // *new_task);
3453   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
3454   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
3455   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
3456   // list is not empty
3457   auto *ThreadID = getThreadID(CGF, Loc);
3458   auto *UpLoc = emitUpdateLocation(CGF, Loc);
3459   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
3460   llvm::Value *DepTaskArgs[7];
3461   if (NumDependencies) {
3462     DepTaskArgs[0] = UpLoc;
3463     DepTaskArgs[1] = ThreadID;
3464     DepTaskArgs[2] = NewTask;
3465     DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
3466     DepTaskArgs[4] = DependenciesArray.getPointer();
3467     DepTaskArgs[5] = CGF.Builder.getInt32(0);
3468     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3469   }
3470   auto &&ThenCodeGen = [NumDependencies, &TaskArgs,
3471                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
3472     // TODO: add check for untied tasks.
3473     auto &RT = CGF.CGM.getOpenMPRuntime();
3474     if (NumDependencies) {
3475       CGF.EmitRuntimeCall(
3476           RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps),
3477           DepTaskArgs);
3478     } else {
3479       CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_task),
3480                           TaskArgs);
3481     }
3482   };
3483 
3484   llvm::Value *DepWaitTaskArgs[6];
3485   if (NumDependencies) {
3486     DepWaitTaskArgs[0] = UpLoc;
3487     DepWaitTaskArgs[1] = ThreadID;
3488     DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
3489     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
3490     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
3491     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3492   }
3493   auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
3494                         NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF,
3495                                                            PrePostActionTy &) {
3496     auto &RT = CGF.CGM.getOpenMPRuntime();
3497     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
3498     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
3499     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
3500     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
3501     // is specified.
3502     if (NumDependencies)
3503       CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
3504                           DepWaitTaskArgs);
3505     // Call proxy_task_entry(gtid, new_task);
3506     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy](
3507         CodeGenFunction &CGF, PrePostActionTy &Action) {
3508       Action.Enter(CGF);
3509       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
3510       CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs);
3511     };
3512 
3513     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
3514     // kmp_task_t *new_task);
3515     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
3516     // kmp_task_t *new_task);
3517     RegionCodeGenTy RCG(CodeGen);
3518     CommonActionTy Action(
3519         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
3520         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
3521     RCG.setAction(Action);
3522     RCG(CGF);
3523   };
3524 
3525   if (IfCond)
3526     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
3527   else {
3528     RegionCodeGenTy ThenRCG(ThenCodeGen);
3529     ThenRCG(CGF);
3530   }
3531 }
3532 
3533 /// \brief Emit reduction operation for each element of array (required for
3534 /// array sections) LHS op = RHS.
3535 /// \param Type Type of array.
3536 /// \param LHSVar Variable on the left side of the reduction operation
3537 /// (references element of array in original variable).
3538 /// \param RHSVar Variable on the right side of the reduction operation
3539 /// (references element of array in original variable).
3540 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
3541 /// RHSVar.
3542 static void EmitOMPAggregateReduction(
3543     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
3544     const VarDecl *RHSVar,
3545     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
3546                                   const Expr *, const Expr *)> &RedOpGen,
3547     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
3548     const Expr *UpExpr = nullptr) {
3549   // Perform element-by-element initialization.
3550   QualType ElementTy;
3551   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
3552   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
3553 
3554   // Drill down to the base element type on both arrays.
3555   auto ArrayTy = Type->getAsArrayTypeUnsafe();
3556   auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
3557 
3558   auto RHSBegin = RHSAddr.getPointer();
3559   auto LHSBegin = LHSAddr.getPointer();
3560   // Cast from pointer to array type to pointer to single element.
3561   auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
3562   // The basic structure here is a while-do loop.
3563   auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
3564   auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
3565   auto IsEmpty =
3566       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
3567   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
3568 
3569   // Enter the loop body, making that address the current address.
3570   auto EntryBB = CGF.Builder.GetInsertBlock();
3571   CGF.EmitBlock(BodyBB);
3572 
3573   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
3574 
3575   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
3576       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
3577   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
3578   Address RHSElementCurrent =
3579       Address(RHSElementPHI,
3580               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
3581 
3582   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
3583       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
3584   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
3585   Address LHSElementCurrent =
3586       Address(LHSElementPHI,
3587               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
3588 
3589   // Emit copy.
3590   CodeGenFunction::OMPPrivateScope Scope(CGF);
3591   Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; });
3592   Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; });
3593   Scope.Privatize();
3594   RedOpGen(CGF, XExpr, EExpr, UpExpr);
3595   Scope.ForceCleanup();
3596 
3597   // Shift the address forward by one element.
3598   auto LHSElementNext = CGF.Builder.CreateConstGEP1_32(
3599       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
3600   auto RHSElementNext = CGF.Builder.CreateConstGEP1_32(
3601       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
3602   // Check whether we've reached the end.
3603   auto Done =
3604       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
3605   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
3606   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
3607   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
3608 
3609   // Done.
3610   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
3611 }
3612 
3613 /// Emit reduction combiner. If the combiner is a simple expression emit it as
3614 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
3615 /// UDR combiner function.
3616 static void emitReductionCombiner(CodeGenFunction &CGF,
3617                                   const Expr *ReductionOp) {
3618   if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
3619     if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
3620       if (auto *DRE =
3621               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
3622         if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
3623           std::pair<llvm::Function *, llvm::Function *> Reduction =
3624               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
3625           RValue Func = RValue::get(Reduction.first);
3626           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
3627           CGF.EmitIgnoredExpr(ReductionOp);
3628           return;
3629         }
3630   CGF.EmitIgnoredExpr(ReductionOp);
3631 }
3632 
3633 static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
3634                                           llvm::Type *ArgsType,
3635                                           ArrayRef<const Expr *> Privates,
3636                                           ArrayRef<const Expr *> LHSExprs,
3637                                           ArrayRef<const Expr *> RHSExprs,
3638                                           ArrayRef<const Expr *> ReductionOps) {
3639   auto &C = CGM.getContext();
3640 
3641   // void reduction_func(void *LHSArg, void *RHSArg);
3642   FunctionArgList Args;
3643   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
3644                            C.VoidPtrTy);
3645   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
3646                            C.VoidPtrTy);
3647   Args.push_back(&LHSArg);
3648   Args.push_back(&RHSArg);
3649   auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3650   auto *Fn = llvm::Function::Create(
3651       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
3652       ".omp.reduction.reduction_func", &CGM.getModule());
3653   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
3654   CodeGenFunction CGF(CGM);
3655   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
3656 
3657   // Dst = (void*[n])(LHSArg);
3658   // Src = (void*[n])(RHSArg);
3659   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3660       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3661       ArgsType), CGF.getPointerAlign());
3662   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3663       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3664       ArgsType), CGF.getPointerAlign());
3665 
3666   //  ...
3667   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
3668   //  ...
3669   CodeGenFunction::OMPPrivateScope Scope(CGF);
3670   auto IPriv = Privates.begin();
3671   unsigned Idx = 0;
3672   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
3673     auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
3674     Scope.addPrivate(RHSVar, [&]() -> Address {
3675       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
3676     });
3677     auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
3678     Scope.addPrivate(LHSVar, [&]() -> Address {
3679       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
3680     });
3681     QualType PrivTy = (*IPriv)->getType();
3682     if (PrivTy->isVariablyModifiedType()) {
3683       // Get array size and emit VLA type.
3684       ++Idx;
3685       Address Elem =
3686           CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize());
3687       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
3688       auto *VLA = CGF.getContext().getAsVariableArrayType(PrivTy);
3689       auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
3690       CodeGenFunction::OpaqueValueMapping OpaqueMap(
3691           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
3692       CGF.EmitVariablyModifiedType(PrivTy);
3693     }
3694   }
3695   Scope.Privatize();
3696   IPriv = Privates.begin();
3697   auto ILHS = LHSExprs.begin();
3698   auto IRHS = RHSExprs.begin();
3699   for (auto *E : ReductionOps) {
3700     if ((*IPriv)->getType()->isArrayType()) {
3701       // Emit reduction for array section.
3702       auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3703       auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3704       EmitOMPAggregateReduction(
3705           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
3706           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
3707             emitReductionCombiner(CGF, E);
3708           });
3709     } else
3710       // Emit reduction for array subscript or single variable.
3711       emitReductionCombiner(CGF, E);
3712     ++IPriv;
3713     ++ILHS;
3714     ++IRHS;
3715   }
3716   Scope.ForceCleanup();
3717   CGF.FinishFunction();
3718   return Fn;
3719 }
3720 
3721 static void emitSingleReductionCombiner(CodeGenFunction &CGF,
3722                                         const Expr *ReductionOp,
3723                                         const Expr *PrivateRef,
3724                                         const DeclRefExpr *LHS,
3725                                         const DeclRefExpr *RHS) {
3726   if (PrivateRef->getType()->isArrayType()) {
3727     // Emit reduction for array section.
3728     auto *LHSVar = cast<VarDecl>(LHS->getDecl());
3729     auto *RHSVar = cast<VarDecl>(RHS->getDecl());
3730     EmitOMPAggregateReduction(
3731         CGF, PrivateRef->getType(), LHSVar, RHSVar,
3732         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
3733           emitReductionCombiner(CGF, ReductionOp);
3734         });
3735   } else
3736     // Emit reduction for array subscript or single variable.
3737     emitReductionCombiner(CGF, ReductionOp);
3738 }
3739 
3740 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
3741                                     ArrayRef<const Expr *> Privates,
3742                                     ArrayRef<const Expr *> LHSExprs,
3743                                     ArrayRef<const Expr *> RHSExprs,
3744                                     ArrayRef<const Expr *> ReductionOps,
3745                                     bool WithNowait, bool SimpleReduction) {
3746   if (!CGF.HaveInsertPoint())
3747     return;
3748   // Next code should be emitted for reduction:
3749   //
3750   // static kmp_critical_name lock = { 0 };
3751   //
3752   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
3753   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
3754   //  ...
3755   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
3756   //  *(Type<n>-1*)rhs[<n>-1]);
3757   // }
3758   //
3759   // ...
3760   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
3761   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
3762   // RedList, reduce_func, &<lock>)) {
3763   // case 1:
3764   //  ...
3765   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
3766   //  ...
3767   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
3768   // break;
3769   // case 2:
3770   //  ...
3771   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
3772   //  ...
3773   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
3774   // break;
3775   // default:;
3776   // }
3777   //
3778   // if SimpleReduction is true, only the next code is generated:
3779   //  ...
3780   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
3781   //  ...
3782 
3783   auto &C = CGM.getContext();
3784 
3785   if (SimpleReduction) {
3786     CodeGenFunction::RunCleanupsScope Scope(CGF);
3787     auto IPriv = Privates.begin();
3788     auto ILHS = LHSExprs.begin();
3789     auto IRHS = RHSExprs.begin();
3790     for (auto *E : ReductionOps) {
3791       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
3792                                   cast<DeclRefExpr>(*IRHS));
3793       ++IPriv;
3794       ++ILHS;
3795       ++IRHS;
3796     }
3797     return;
3798   }
3799 
3800   // 1. Build a list of reduction variables.
3801   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
3802   auto Size = RHSExprs.size();
3803   for (auto *E : Privates) {
3804     if (E->getType()->isVariablyModifiedType())
3805       // Reserve place for array size.
3806       ++Size;
3807   }
3808   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
3809   QualType ReductionArrayTy =
3810       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
3811                              /*IndexTypeQuals=*/0);
3812   Address ReductionList =
3813       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
3814   auto IPriv = Privates.begin();
3815   unsigned Idx = 0;
3816   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
3817     Address Elem =
3818       CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize());
3819     CGF.Builder.CreateStore(
3820         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3821             CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
3822         Elem);
3823     if ((*IPriv)->getType()->isVariablyModifiedType()) {
3824       // Store array size.
3825       ++Idx;
3826       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
3827                                              CGF.getPointerSize());
3828       llvm::Value *Size = CGF.Builder.CreateIntCast(
3829           CGF.getVLASize(
3830                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
3831               .first,
3832           CGF.SizeTy, /*isSigned=*/false);
3833       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
3834                               Elem);
3835     }
3836   }
3837 
3838   // 2. Emit reduce_func().
3839   auto *ReductionFn = emitReductionFunction(
3840       CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
3841       LHSExprs, RHSExprs, ReductionOps);
3842 
3843   // 3. Create static kmp_critical_name lock = { 0 };
3844   auto *Lock = getCriticalRegionLock(".reduction");
3845 
3846   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
3847   // RedList, reduce_func, &<lock>);
3848   auto *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
3849   auto *ThreadId = getThreadID(CGF, Loc);
3850   auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
3851   auto *RL =
3852     CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList.getPointer(),
3853                                                     CGF.VoidPtrTy);
3854   llvm::Value *Args[] = {
3855       IdentTLoc,                             // ident_t *<loc>
3856       ThreadId,                              // i32 <gtid>
3857       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
3858       ReductionArrayTySize,                  // size_type sizeof(RedList)
3859       RL,                                    // void *RedList
3860       ReductionFn, // void (*) (void *, void *) <reduce_func>
3861       Lock         // kmp_critical_name *&<lock>
3862   };
3863   auto Res = CGF.EmitRuntimeCall(
3864       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
3865                                        : OMPRTL__kmpc_reduce),
3866       Args);
3867 
3868   // 5. Build switch(res)
3869   auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
3870   auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
3871 
3872   // 6. Build case 1:
3873   //  ...
3874   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
3875   //  ...
3876   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
3877   // break;
3878   auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
3879   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
3880   CGF.EmitBlock(Case1BB);
3881 
3882   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
3883   llvm::Value *EndArgs[] = {
3884       IdentTLoc, // ident_t *<loc>
3885       ThreadId,  // i32 <gtid>
3886       Lock       // kmp_critical_name *&<lock>
3887   };
3888   auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps](
3889       CodeGenFunction &CGF, PrePostActionTy &Action) {
3890     auto IPriv = Privates.begin();
3891     auto ILHS = LHSExprs.begin();
3892     auto IRHS = RHSExprs.begin();
3893     for (auto *E : ReductionOps) {
3894       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
3895                                   cast<DeclRefExpr>(*IRHS));
3896       ++IPriv;
3897       ++ILHS;
3898       ++IRHS;
3899     }
3900   };
3901   RegionCodeGenTy RCG(CodeGen);
3902   CommonActionTy Action(
3903       nullptr, llvm::None,
3904       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
3905                                        : OMPRTL__kmpc_end_reduce),
3906       EndArgs);
3907   RCG.setAction(Action);
3908   RCG(CGF);
3909 
3910   CGF.EmitBranch(DefaultBB);
3911 
3912   // 7. Build case 2:
3913   //  ...
3914   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
3915   //  ...
3916   // break;
3917   auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
3918   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
3919   CGF.EmitBlock(Case2BB);
3920 
3921   auto &&AtomicCodeGen = [Loc, &Privates, &LHSExprs, &RHSExprs, &ReductionOps](
3922       CodeGenFunction &CGF, PrePostActionTy &Action) {
3923     auto ILHS = LHSExprs.begin();
3924     auto IRHS = RHSExprs.begin();
3925     auto IPriv = Privates.begin();
3926     for (auto *E : ReductionOps) {
3927       const Expr *XExpr = nullptr;
3928       const Expr *EExpr = nullptr;
3929       const Expr *UpExpr = nullptr;
3930       BinaryOperatorKind BO = BO_Comma;
3931       if (auto *BO = dyn_cast<BinaryOperator>(E)) {
3932         if (BO->getOpcode() == BO_Assign) {
3933           XExpr = BO->getLHS();
3934           UpExpr = BO->getRHS();
3935         }
3936       }
3937       // Try to emit update expression as a simple atomic.
3938       auto *RHSExpr = UpExpr;
3939       if (RHSExpr) {
3940         // Analyze RHS part of the whole expression.
3941         if (auto *ACO = dyn_cast<AbstractConditionalOperator>(
3942                 RHSExpr->IgnoreParenImpCasts())) {
3943           // If this is a conditional operator, analyze its condition for
3944           // min/max reduction operator.
3945           RHSExpr = ACO->getCond();
3946         }
3947         if (auto *BORHS =
3948                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
3949           EExpr = BORHS->getRHS();
3950           BO = BORHS->getOpcode();
3951         }
3952       }
3953       if (XExpr) {
3954         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3955         auto &&AtomicRedGen = [BO, VD, IPriv,
3956                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
3957                                     const Expr *EExpr, const Expr *UpExpr) {
3958           LValue X = CGF.EmitLValue(XExpr);
3959           RValue E;
3960           if (EExpr)
3961             E = CGF.EmitAnyExpr(EExpr);
3962           CGF.EmitOMPAtomicSimpleUpdateExpr(
3963               X, E, BO, /*IsXLHSInRHSPart=*/true,
3964               llvm::AtomicOrdering::Monotonic, Loc,
3965               [&CGF, UpExpr, VD, IPriv, Loc](RValue XRValue) {
3966                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
3967                 PrivateScope.addPrivate(
3968                     VD, [&CGF, VD, XRValue, Loc]() -> Address {
3969                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
3970                       CGF.emitOMPSimpleStore(
3971                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
3972                           VD->getType().getNonReferenceType(), Loc);
3973                       return LHSTemp;
3974                     });
3975                 (void)PrivateScope.Privatize();
3976                 return CGF.EmitAnyExpr(UpExpr);
3977               });
3978         };
3979         if ((*IPriv)->getType()->isArrayType()) {
3980           // Emit atomic reduction for array section.
3981           auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3982           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
3983                                     AtomicRedGen, XExpr, EExpr, UpExpr);
3984         } else
3985           // Emit atomic reduction for array subscript or single variable.
3986           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
3987       } else {
3988         // Emit as a critical region.
3989         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
3990                                      const Expr *, const Expr *) {
3991           auto &RT = CGF.CGM.getOpenMPRuntime();
3992           RT.emitCriticalRegion(
3993               CGF, ".atomic_reduction",
3994               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
3995                 Action.Enter(CGF);
3996                 emitReductionCombiner(CGF, E);
3997               },
3998               Loc);
3999         };
4000         if ((*IPriv)->getType()->isArrayType()) {
4001           auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4002           auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4003           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4004                                     CritRedGen);
4005         } else
4006           CritRedGen(CGF, nullptr, nullptr, nullptr);
4007       }
4008       ++ILHS;
4009       ++IRHS;
4010       ++IPriv;
4011     }
4012   };
4013   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
4014   if (!WithNowait) {
4015     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
4016     llvm::Value *EndArgs[] = {
4017         IdentTLoc, // ident_t *<loc>
4018         ThreadId,  // i32 <gtid>
4019         Lock       // kmp_critical_name *&<lock>
4020     };
4021     CommonActionTy Action(nullptr, llvm::None,
4022                           createRuntimeFunction(OMPRTL__kmpc_end_reduce),
4023                           EndArgs);
4024     AtomicRCG.setAction(Action);
4025     AtomicRCG(CGF);
4026   } else
4027     AtomicRCG(CGF);
4028 
4029   CGF.EmitBranch(DefaultBB);
4030   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
4031 }
4032 
4033 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
4034                                        SourceLocation Loc) {
4035   if (!CGF.HaveInsertPoint())
4036     return;
4037   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
4038   // global_tid);
4039   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
4040   // Ignore return result until untied tasks are supported.
4041   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
4042 }
4043 
4044 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
4045                                            OpenMPDirectiveKind InnerKind,
4046                                            const RegionCodeGenTy &CodeGen,
4047                                            bool HasCancel) {
4048   if (!CGF.HaveInsertPoint())
4049     return;
4050   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
4051   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
4052 }
4053 
4054 namespace {
4055 enum RTCancelKind {
4056   CancelNoreq = 0,
4057   CancelParallel = 1,
4058   CancelLoop = 2,
4059   CancelSections = 3,
4060   CancelTaskgroup = 4
4061 };
4062 } // anonymous namespace
4063 
4064 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
4065   RTCancelKind CancelKind = CancelNoreq;
4066   if (CancelRegion == OMPD_parallel)
4067     CancelKind = CancelParallel;
4068   else if (CancelRegion == OMPD_for)
4069     CancelKind = CancelLoop;
4070   else if (CancelRegion == OMPD_sections)
4071     CancelKind = CancelSections;
4072   else {
4073     assert(CancelRegion == OMPD_taskgroup);
4074     CancelKind = CancelTaskgroup;
4075   }
4076   return CancelKind;
4077 }
4078 
4079 void CGOpenMPRuntime::emitCancellationPointCall(
4080     CodeGenFunction &CGF, SourceLocation Loc,
4081     OpenMPDirectiveKind CancelRegion) {
4082   if (!CGF.HaveInsertPoint())
4083     return;
4084   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
4085   // global_tid, kmp_int32 cncl_kind);
4086   if (auto *OMPRegionInfo =
4087           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
4088     if (OMPRegionInfo->hasCancel()) {
4089       llvm::Value *Args[] = {
4090           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
4091           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
4092       // Ignore return result until untied tasks are supported.
4093       auto *Result = CGF.EmitRuntimeCall(
4094           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
4095       // if (__kmpc_cancellationpoint()) {
4096       //  __kmpc_cancel_barrier();
4097       //   exit from construct;
4098       // }
4099       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
4100       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
4101       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
4102       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
4103       CGF.EmitBlock(ExitBB);
4104       // __kmpc_cancel_barrier();
4105       emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
4106       // exit from construct;
4107       auto CancelDest =
4108           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
4109       CGF.EmitBranchThroughCleanup(CancelDest);
4110       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
4111     }
4112   }
4113 }
4114 
4115 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
4116                                      const Expr *IfCond,
4117                                      OpenMPDirectiveKind CancelRegion) {
4118   if (!CGF.HaveInsertPoint())
4119     return;
4120   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
4121   // kmp_int32 cncl_kind);
4122   if (auto *OMPRegionInfo =
4123           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
4124     auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
4125                                                         PrePostActionTy &) {
4126       auto &RT = CGF.CGM.getOpenMPRuntime();
4127       llvm::Value *Args[] = {
4128           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
4129           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
4130       // Ignore return result until untied tasks are supported.
4131       auto *Result = CGF.EmitRuntimeCall(
4132           RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
4133       // if (__kmpc_cancel()) {
4134       //  __kmpc_cancel_barrier();
4135       //   exit from construct;
4136       // }
4137       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
4138       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
4139       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
4140       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
4141       CGF.EmitBlock(ExitBB);
4142       // __kmpc_cancel_barrier();
4143       RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
4144       // exit from construct;
4145       auto CancelDest =
4146           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
4147       CGF.EmitBranchThroughCleanup(CancelDest);
4148       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
4149     };
4150     if (IfCond)
4151       emitOMPIfClause(CGF, IfCond, ThenGen,
4152                       [](CodeGenFunction &, PrePostActionTy &) {});
4153     else {
4154       RegionCodeGenTy ThenRCG(ThenGen);
4155       ThenRCG(CGF);
4156     }
4157   }
4158 }
4159 
4160 /// \brief Obtain information that uniquely identifies a target entry. This
4161 /// consists of the file and device IDs as well as line number associated with
4162 /// the relevant entry source location.
4163 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
4164                                      unsigned &DeviceID, unsigned &FileID,
4165                                      unsigned &LineNum) {
4166 
4167   auto &SM = C.getSourceManager();
4168 
4169   // The loc should be always valid and have a file ID (the user cannot use
4170   // #pragma directives in macros)
4171 
4172   assert(Loc.isValid() && "Source location is expected to be always valid.");
4173   assert(Loc.isFileID() && "Source location is expected to refer to a file.");
4174 
4175   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
4176   assert(PLoc.isValid() && "Source location is expected to be always valid.");
4177 
4178   llvm::sys::fs::UniqueID ID;
4179   if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
4180     llvm_unreachable("Source file with target region no longer exists!");
4181 
4182   DeviceID = ID.getDevice();
4183   FileID = ID.getFile();
4184   LineNum = PLoc.getLine();
4185 }
4186 
4187 void CGOpenMPRuntime::emitTargetOutlinedFunction(
4188     const OMPExecutableDirective &D, StringRef ParentName,
4189     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
4190     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
4191   assert(!ParentName.empty() && "Invalid target region parent name!");
4192 
4193   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
4194                                    IsOffloadEntry, CodeGen);
4195 }
4196 
4197 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
4198     const OMPExecutableDirective &D, StringRef ParentName,
4199     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
4200     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
4201   // Create a unique name for the entry function using the source location
4202   // information of the current target region. The name will be something like:
4203   //
4204   // __omp_offloading_DD_FFFF_PP_lBB
4205   //
4206   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
4207   // mangled name of the function that encloses the target region and BB is the
4208   // line number of the target region.
4209 
4210   unsigned DeviceID;
4211   unsigned FileID;
4212   unsigned Line;
4213   getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID,
4214                            Line);
4215   SmallString<64> EntryFnName;
4216   {
4217     llvm::raw_svector_ostream OS(EntryFnName);
4218     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
4219        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
4220   }
4221 
4222   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
4223 
4224   CodeGenFunction CGF(CGM, true);
4225   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
4226   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
4227 
4228   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
4229 
4230   // If this target outline function is not an offload entry, we don't need to
4231   // register it.
4232   if (!IsOffloadEntry)
4233     return;
4234 
4235   // The target region ID is used by the runtime library to identify the current
4236   // target region, so it only has to be unique and not necessarily point to
4237   // anything. It could be the pointer to the outlined function that implements
4238   // the target region, but we aren't using that so that the compiler doesn't
4239   // need to keep that, and could therefore inline the host function if proven
4240   // worthwhile during optimization. In the other hand, if emitting code for the
4241   // device, the ID has to be the function address so that it can retrieved from
4242   // the offloading entry and launched by the runtime library. We also mark the
4243   // outlined function to have external linkage in case we are emitting code for
4244   // the device, because these functions will be entry points to the device.
4245 
4246   if (CGM.getLangOpts().OpenMPIsDevice) {
4247     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
4248     OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage);
4249   } else
4250     OutlinedFnID = new llvm::GlobalVariable(
4251         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
4252         llvm::GlobalValue::PrivateLinkage,
4253         llvm::Constant::getNullValue(CGM.Int8Ty), ".omp_offload.region_id");
4254 
4255   // Register the information for the entry associated with this target region.
4256   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
4257       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID);
4258 }
4259 
4260 /// \brief Emit the num_teams clause of an enclosed teams directive at the
4261 /// target region scope. If there is no teams directive associated with the
4262 /// target directive, or if there is no num_teams clause associated with the
4263 /// enclosed teams directive, return nullptr.
4264 static llvm::Value *
4265 emitNumTeamsClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime,
4266                                      CodeGenFunction &CGF,
4267                                      const OMPExecutableDirective &D) {
4268 
4269   assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
4270                                               "teams directive expected to be "
4271                                               "emitted only for the host!");
4272 
4273   // FIXME: For the moment we do not support combined directives with target and
4274   // teams, so we do not expect to get any num_teams clause in the provided
4275   // directive. Once we support that, this assertion can be replaced by the
4276   // actual emission of the clause expression.
4277   assert(D.getSingleClause<OMPNumTeamsClause>() == nullptr &&
4278          "Not expecting clause in directive.");
4279 
4280   // If the current target region has a teams region enclosed, we need to get
4281   // the number of teams to pass to the runtime function call. This is done
4282   // by generating the expression in a inlined region. This is required because
4283   // the expression is captured in the enclosing target environment when the
4284   // teams directive is not combined with target.
4285 
4286   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
4287 
4288   // FIXME: Accommodate other combined directives with teams when they become
4289   // available.
4290   if (auto *TeamsDir = dyn_cast<OMPTeamsDirective>(CS.getCapturedStmt())) {
4291     if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) {
4292       CGOpenMPInnerExprInfo CGInfo(CGF, CS);
4293       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
4294       llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams());
4295       return CGF.Builder.CreateIntCast(NumTeams, CGF.Int32Ty,
4296                                        /*IsSigned=*/true);
4297     }
4298 
4299     // If we have an enclosed teams directive but no num_teams clause we use
4300     // the default value 0.
4301     return CGF.Builder.getInt32(0);
4302   }
4303 
4304   // No teams associated with the directive.
4305   return nullptr;
4306 }
4307 
4308 /// \brief Emit the thread_limit clause of an enclosed teams directive at the
4309 /// target region scope. If there is no teams directive associated with the
4310 /// target directive, or if there is no thread_limit clause associated with the
4311 /// enclosed teams directive, return nullptr.
4312 static llvm::Value *
4313 emitThreadLimitClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime,
4314                                         CodeGenFunction &CGF,
4315                                         const OMPExecutableDirective &D) {
4316 
4317   assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
4318                                               "teams directive expected to be "
4319                                               "emitted only for the host!");
4320 
4321   // FIXME: For the moment we do not support combined directives with target and
4322   // teams, so we do not expect to get any thread_limit clause in the provided
4323   // directive. Once we support that, this assertion can be replaced by the
4324   // actual emission of the clause expression.
4325   assert(D.getSingleClause<OMPThreadLimitClause>() == nullptr &&
4326          "Not expecting clause in directive.");
4327 
4328   // If the current target region has a teams region enclosed, we need to get
4329   // the thread limit to pass to the runtime function call. This is done
4330   // by generating the expression in a inlined region. This is required because
4331   // the expression is captured in the enclosing target environment when the
4332   // teams directive is not combined with target.
4333 
4334   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
4335 
4336   // FIXME: Accommodate other combined directives with teams when they become
4337   // available.
4338   if (auto *TeamsDir = dyn_cast<OMPTeamsDirective>(CS.getCapturedStmt())) {
4339     if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) {
4340       CGOpenMPInnerExprInfo CGInfo(CGF, CS);
4341       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
4342       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit());
4343       return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty,
4344                                        /*IsSigned=*/true);
4345     }
4346 
4347     // If we have an enclosed teams directive but no thread_limit clause we use
4348     // the default value 0.
4349     return CGF.Builder.getInt32(0);
4350   }
4351 
4352   // No teams associated with the directive.
4353   return nullptr;
4354 }
4355 
4356 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
4357                                      const OMPExecutableDirective &D,
4358                                      llvm::Value *OutlinedFn,
4359                                      llvm::Value *OutlinedFnID,
4360                                      const Expr *IfCond, const Expr *Device,
4361                                      ArrayRef<llvm::Value *> CapturedVars) {
4362   if (!CGF.HaveInsertPoint())
4363     return;
4364   /// \brief Values for bit flags used to specify the mapping type for
4365   /// offloading.
4366   enum OpenMPOffloadMappingFlags {
4367     /// \brief Allocate memory on the device and move data from host to device.
4368     OMP_MAP_TO = 0x01,
4369     /// \brief Allocate memory on the device and move data from device to host.
4370     OMP_MAP_FROM = 0x02,
4371     /// \brief The element passed to the device is a pointer.
4372     OMP_MAP_PTR = 0x20,
4373     /// \brief Pass the element to the device by value.
4374     OMP_MAP_BYCOPY = 0x80,
4375   };
4376 
4377   enum OpenMPOffloadingReservedDeviceIDs {
4378     /// \brief Device ID if the device was not defined, runtime should get it
4379     /// from environment variables in the spec.
4380     OMP_DEVICEID_UNDEF = -1,
4381   };
4382 
4383   assert(OutlinedFn && "Invalid outlined function!");
4384 
4385   auto &Ctx = CGF.getContext();
4386 
4387   // Fill up the arrays with the all the captured variables.
4388   SmallVector<llvm::Value *, 16> BasePointers;
4389   SmallVector<llvm::Value *, 16> Pointers;
4390   SmallVector<llvm::Value *, 16> Sizes;
4391   SmallVector<unsigned, 16> MapTypes;
4392 
4393   bool hasVLACaptures = false;
4394 
4395   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
4396   auto RI = CS.getCapturedRecordDecl()->field_begin();
4397   // auto II = CS.capture_init_begin();
4398   auto CV = CapturedVars.begin();
4399   for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
4400                                             CE = CS.capture_end();
4401        CI != CE; ++CI, ++RI, ++CV) {
4402     StringRef Name;
4403     QualType Ty;
4404     llvm::Value *BasePointer;
4405     llvm::Value *Pointer;
4406     llvm::Value *Size;
4407     unsigned MapType;
4408 
4409     // VLA sizes are passed to the outlined region by copy.
4410     if (CI->capturesVariableArrayType()) {
4411       BasePointer = Pointer = *CV;
4412       Size = CGF.getTypeSize(RI->getType());
4413       // Copy to the device as an argument. No need to retrieve it.
4414       MapType = OMP_MAP_BYCOPY;
4415       hasVLACaptures = true;
4416     } else if (CI->capturesThis()) {
4417       BasePointer = Pointer = *CV;
4418       const PointerType *PtrTy = cast<PointerType>(RI->getType().getTypePtr());
4419       Size = CGF.getTypeSize(PtrTy->getPointeeType());
4420       // Default map type.
4421       MapType = OMP_MAP_TO | OMP_MAP_FROM;
4422     } else if (CI->capturesVariableByCopy()) {
4423       MapType = OMP_MAP_BYCOPY;
4424       if (!RI->getType()->isAnyPointerType()) {
4425         // If the field is not a pointer, we need to save the actual value and
4426         // load it as a void pointer.
4427         auto DstAddr = CGF.CreateMemTemp(
4428             Ctx.getUIntPtrType(),
4429             Twine(CI->getCapturedVar()->getName()) + ".casted");
4430         LValue DstLV = CGF.MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
4431 
4432         auto *SrcAddrVal = CGF.EmitScalarConversion(
4433             DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
4434             Ctx.getPointerType(RI->getType()), SourceLocation());
4435         LValue SrcLV =
4436             CGF.MakeNaturalAlignAddrLValue(SrcAddrVal, RI->getType());
4437 
4438         // Store the value using the source type pointer.
4439         CGF.EmitStoreThroughLValue(RValue::get(*CV), SrcLV);
4440 
4441         // Load the value using the destination type pointer.
4442         BasePointer = Pointer =
4443             CGF.EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal();
4444       } else {
4445         MapType |= OMP_MAP_PTR;
4446         BasePointer = Pointer = *CV;
4447       }
4448       Size = CGF.getTypeSize(RI->getType());
4449     } else {
4450       assert(CI->capturesVariable() && "Expected captured reference.");
4451       BasePointer = Pointer = *CV;
4452 
4453       const ReferenceType *PtrTy =
4454           cast<ReferenceType>(RI->getType().getTypePtr());
4455       QualType ElementType = PtrTy->getPointeeType();
4456       Size = CGF.getTypeSize(ElementType);
4457       // The default map type for a scalar/complex type is 'to' because by
4458       // default the value doesn't have to be retrieved. For an aggregate type,
4459       // the default is 'tofrom'.
4460       MapType = ElementType->isAggregateType() ? (OMP_MAP_TO | OMP_MAP_FROM)
4461                                                : OMP_MAP_TO;
4462       if (ElementType->isAnyPointerType())
4463         MapType |= OMP_MAP_PTR;
4464     }
4465 
4466     BasePointers.push_back(BasePointer);
4467     Pointers.push_back(Pointer);
4468     Sizes.push_back(Size);
4469     MapTypes.push_back(MapType);
4470   }
4471 
4472   // Keep track on whether the host function has to be executed.
4473   auto OffloadErrorQType =
4474       Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true);
4475   auto OffloadError = CGF.MakeAddrLValue(
4476       CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"),
4477       OffloadErrorQType);
4478   CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty),
4479                         OffloadError);
4480 
4481   // Fill up the pointer arrays and transfer execution to the device.
4482   auto &&ThenGen = [&Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes,
4483                     hasVLACaptures, Device, OutlinedFnID, OffloadError,
4484                     OffloadErrorQType,
4485                     &D](CodeGenFunction &CGF, PrePostActionTy &) {
4486     auto &RT = CGF.CGM.getOpenMPRuntime();
4487     unsigned PointerNumVal = BasePointers.size();
4488     llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal);
4489     llvm::Value *BasePointersArray;
4490     llvm::Value *PointersArray;
4491     llvm::Value *SizesArray;
4492     llvm::Value *MapTypesArray;
4493 
4494     if (PointerNumVal) {
4495       llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true);
4496       QualType PointerArrayType = Ctx.getConstantArrayType(
4497           Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
4498           /*IndexTypeQuals=*/0);
4499 
4500       BasePointersArray =
4501           CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
4502       PointersArray =
4503           CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
4504 
4505       // If we don't have any VLA types, we can use a constant array for the map
4506       // sizes, otherwise we need to fill up the arrays as we do for the
4507       // pointers.
4508       if (hasVLACaptures) {
4509         QualType SizeArrayType = Ctx.getConstantArrayType(
4510             Ctx.getSizeType(), PointerNumAP, ArrayType::Normal,
4511             /*IndexTypeQuals=*/0);
4512         SizesArray =
4513             CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
4514       } else {
4515         // We expect all the sizes to be constant, so we collect them to create
4516         // a constant array.
4517         SmallVector<llvm::Constant *, 16> ConstSizes;
4518         for (auto S : Sizes)
4519           ConstSizes.push_back(cast<llvm::Constant>(S));
4520 
4521         auto *SizesArrayInit = llvm::ConstantArray::get(
4522             llvm::ArrayType::get(CGF.CGM.SizeTy, ConstSizes.size()),
4523             ConstSizes);
4524         auto *SizesArrayGbl = new llvm::GlobalVariable(
4525             CGF.CGM.getModule(), SizesArrayInit->getType(),
4526             /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
4527             SizesArrayInit, ".offload_sizes");
4528         SizesArrayGbl->setUnnamedAddr(true);
4529         SizesArray = SizesArrayGbl;
4530       }
4531 
4532       // The map types are always constant so we don't need to generate code to
4533       // fill arrays. Instead, we create an array constant.
4534       llvm::Constant *MapTypesArrayInit =
4535           llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes);
4536       auto *MapTypesArrayGbl = new llvm::GlobalVariable(
4537           CGF.CGM.getModule(), MapTypesArrayInit->getType(),
4538           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
4539           MapTypesArrayInit, ".offload_maptypes");
4540       MapTypesArrayGbl->setUnnamedAddr(true);
4541       MapTypesArray = MapTypesArrayGbl;
4542 
4543       for (unsigned i = 0; i < PointerNumVal; ++i) {
4544         llvm::Value *BPVal = BasePointers[i];
4545         if (BPVal->getType()->isPointerTy())
4546           BPVal = CGF.Builder.CreateBitCast(BPVal, CGF.VoidPtrTy);
4547         else {
4548           assert(BPVal->getType()->isIntegerTy() &&
4549                  "If not a pointer, the value type must be an integer.");
4550           BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGF.VoidPtrTy);
4551         }
4552         llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
4553             llvm::ArrayType::get(CGF.VoidPtrTy, PointerNumVal),
4554             BasePointersArray, 0, i);
4555         Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
4556         CGF.Builder.CreateStore(BPVal, BPAddr);
4557 
4558         llvm::Value *PVal = Pointers[i];
4559         if (PVal->getType()->isPointerTy())
4560           PVal = CGF.Builder.CreateBitCast(PVal, CGF.VoidPtrTy);
4561         else {
4562           assert(PVal->getType()->isIntegerTy() &&
4563                  "If not a pointer, the value type must be an integer.");
4564           PVal = CGF.Builder.CreateIntToPtr(PVal, CGF.VoidPtrTy);
4565         }
4566         llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
4567             llvm::ArrayType::get(CGF.VoidPtrTy, PointerNumVal), PointersArray,
4568             0, i);
4569         Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
4570         CGF.Builder.CreateStore(PVal, PAddr);
4571 
4572         if (hasVLACaptures) {
4573           llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
4574               llvm::ArrayType::get(CGF.SizeTy, PointerNumVal), SizesArray,
4575               /*Idx0=*/0,
4576               /*Idx1=*/i);
4577           Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType()));
4578           CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(
4579                                       Sizes[i], CGF.SizeTy, /*isSigned=*/true),
4580                                   SAddr);
4581         }
4582       }
4583 
4584       BasePointersArray = CGF.Builder.CreateConstInBoundsGEP2_32(
4585           llvm::ArrayType::get(CGF.VoidPtrTy, PointerNumVal), BasePointersArray,
4586           /*Idx0=*/0, /*Idx1=*/0);
4587       PointersArray = CGF.Builder.CreateConstInBoundsGEP2_32(
4588           llvm::ArrayType::get(CGF.VoidPtrTy, PointerNumVal), PointersArray,
4589           /*Idx0=*/0,
4590           /*Idx1=*/0);
4591       SizesArray = CGF.Builder.CreateConstInBoundsGEP2_32(
4592           llvm::ArrayType::get(CGF.SizeTy, PointerNumVal), SizesArray,
4593           /*Idx0=*/0, /*Idx1=*/0);
4594       MapTypesArray = CGF.Builder.CreateConstInBoundsGEP2_32(
4595           llvm::ArrayType::get(CGF.Int32Ty, PointerNumVal), MapTypesArray,
4596           /*Idx0=*/0,
4597           /*Idx1=*/0);
4598 
4599     } else {
4600       BasePointersArray = llvm::ConstantPointerNull::get(CGF.VoidPtrPtrTy);
4601       PointersArray = llvm::ConstantPointerNull::get(CGF.VoidPtrPtrTy);
4602       SizesArray = llvm::ConstantPointerNull::get(CGF.SizeTy->getPointerTo());
4603       MapTypesArray =
4604           llvm::ConstantPointerNull::get(CGF.Int32Ty->getPointerTo());
4605     }
4606 
4607     // On top of the arrays that were filled up, the target offloading call
4608     // takes as arguments the device id as well as the host pointer. The host
4609     // pointer is used by the runtime library to identify the current target
4610     // region, so it only has to be unique and not necessarily point to
4611     // anything. It could be the pointer to the outlined function that
4612     // implements the target region, but we aren't using that so that the
4613     // compiler doesn't need to keep that, and could therefore inline the host
4614     // function if proven worthwhile during optimization.
4615 
4616     // From this point on, we need to have an ID of the target region defined.
4617     assert(OutlinedFnID && "Invalid outlined function ID!");
4618 
4619     // Emit device ID if any.
4620     llvm::Value *DeviceID;
4621     if (Device)
4622       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4623                                            CGF.Int32Ty, /*isSigned=*/true);
4624     else
4625       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
4626 
4627     // Return value of the runtime offloading call.
4628     llvm::Value *Return;
4629 
4630     auto *NumTeams = emitNumTeamsClauseForTargetDirective(RT, CGF, D);
4631     auto *ThreadLimit = emitThreadLimitClauseForTargetDirective(RT, CGF, D);
4632 
4633     // If we have NumTeams defined this means that we have an enclosed teams
4634     // region. Therefore we also expect to have ThreadLimit defined. These two
4635     // values should be defined in the presence of a teams directive, regardless
4636     // of having any clauses associated. If the user is using teams but no
4637     // clauses, these two values will be the default that should be passed to
4638     // the runtime library - a 32-bit integer with the value zero.
4639     if (NumTeams) {
4640       assert(ThreadLimit && "Thread limit expression should be available along "
4641                             "with number of teams.");
4642       llvm::Value *OffloadingArgs[] = {
4643           DeviceID,          OutlinedFnID,  PointerNum,
4644           BasePointersArray, PointersArray, SizesArray,
4645           MapTypesArray,     NumTeams,      ThreadLimit};
4646       Return = CGF.EmitRuntimeCall(
4647           RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs);
4648     } else {
4649       llvm::Value *OffloadingArgs[] = {
4650           DeviceID,      OutlinedFnID, PointerNum,   BasePointersArray,
4651           PointersArray, SizesArray,   MapTypesArray};
4652       Return = CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target),
4653                                    OffloadingArgs);
4654     }
4655 
4656     CGF.EmitStoreOfScalar(Return, OffloadError);
4657   };
4658 
4659   // Notify that the host version must be executed.
4660   auto &&ElseGen = [OffloadError](CodeGenFunction &CGF, PrePostActionTy &) {
4661     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/-1u),
4662                           OffloadError);
4663   };
4664 
4665   // If we have a target function ID it means that we need to support
4666   // offloading, otherwise, just execute on the host. We need to execute on host
4667   // regardless of the conditional in the if clause if, e.g., the user do not
4668   // specify target triples.
4669   if (OutlinedFnID) {
4670     if (IfCond)
4671       emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
4672     else {
4673       RegionCodeGenTy ThenRCG(ThenGen);
4674       ThenRCG(CGF);
4675     }
4676   } else {
4677     RegionCodeGenTy ElseRCG(ElseGen);
4678     ElseRCG(CGF);
4679   }
4680 
4681   // Check the error code and execute the host version if required.
4682   auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed");
4683   auto OffloadContBlock = CGF.createBasicBlock("omp_offload.cont");
4684   auto OffloadErrorVal = CGF.EmitLoadOfScalar(OffloadError, SourceLocation());
4685   auto Failed = CGF.Builder.CreateIsNotNull(OffloadErrorVal);
4686   CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
4687 
4688   CGF.EmitBlock(OffloadFailedBlock);
4689   CGF.Builder.CreateCall(OutlinedFn, BasePointers);
4690   CGF.EmitBranch(OffloadContBlock);
4691 
4692   CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
4693 }
4694 
4695 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
4696                                                     StringRef ParentName) {
4697   if (!S)
4698     return;
4699 
4700   // If we find a OMP target directive, codegen the outline function and
4701   // register the result.
4702   // FIXME: Add other directives with target when they become supported.
4703   bool isTargetDirective = isa<OMPTargetDirective>(S);
4704 
4705   if (isTargetDirective) {
4706     auto *E = cast<OMPExecutableDirective>(S);
4707     unsigned DeviceID;
4708     unsigned FileID;
4709     unsigned Line;
4710     getTargetEntryUniqueInfo(CGM.getContext(), E->getLocStart(), DeviceID,
4711                              FileID, Line);
4712 
4713     // Is this a target region that should not be emitted as an entry point? If
4714     // so just signal we are done with this target region.
4715     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
4716                                                             ParentName, Line))
4717       return;
4718 
4719     llvm::Function *Fn;
4720     llvm::Constant *Addr;
4721     std::tie(Fn, Addr) =
4722         CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction(
4723             CGM, cast<OMPTargetDirective>(*E), ParentName,
4724             /*isOffloadEntry=*/true);
4725     assert(Fn && Addr && "Target region emission failed.");
4726     return;
4727   }
4728 
4729   if (const OMPExecutableDirective *E = dyn_cast<OMPExecutableDirective>(S)) {
4730     if (!E->getAssociatedStmt())
4731       return;
4732 
4733     scanForTargetRegionsFunctions(
4734         cast<CapturedStmt>(E->getAssociatedStmt())->getCapturedStmt(),
4735         ParentName);
4736     return;
4737   }
4738 
4739   // If this is a lambda function, look into its body.
4740   if (auto *L = dyn_cast<LambdaExpr>(S))
4741     S = L->getBody();
4742 
4743   // Keep looking for target regions recursively.
4744   for (auto *II : S->children())
4745     scanForTargetRegionsFunctions(II, ParentName);
4746 }
4747 
4748 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
4749   auto &FD = *cast<FunctionDecl>(GD.getDecl());
4750 
4751   // If emitting code for the host, we do not process FD here. Instead we do
4752   // the normal code generation.
4753   if (!CGM.getLangOpts().OpenMPIsDevice)
4754     return false;
4755 
4756   // Try to detect target regions in the function.
4757   scanForTargetRegionsFunctions(FD.getBody(), CGM.getMangledName(GD));
4758 
4759   // We should not emit any function othen that the ones created during the
4760   // scanning. Therefore, we signal that this function is completely dealt
4761   // with.
4762   return true;
4763 }
4764 
4765 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
4766   if (!CGM.getLangOpts().OpenMPIsDevice)
4767     return false;
4768 
4769   // Check if there are Ctors/Dtors in this declaration and look for target
4770   // regions in it. We use the complete variant to produce the kernel name
4771   // mangling.
4772   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
4773   if (auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
4774     for (auto *Ctor : RD->ctors()) {
4775       StringRef ParentName =
4776           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
4777       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
4778     }
4779     auto *Dtor = RD->getDestructor();
4780     if (Dtor) {
4781       StringRef ParentName =
4782           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
4783       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
4784     }
4785   }
4786 
4787   // If we are in target mode we do not emit any global (declare target is not
4788   // implemented yet). Therefore we signal that GD was processed in this case.
4789   return true;
4790 }
4791 
4792 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
4793   auto *VD = GD.getDecl();
4794   if (isa<FunctionDecl>(VD))
4795     return emitTargetFunctions(GD);
4796 
4797   return emitTargetGlobalVariable(GD);
4798 }
4799 
4800 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
4801   // If we have offloading in the current module, we need to emit the entries
4802   // now and register the offloading descriptor.
4803   createOffloadEntriesAndInfoMetadata();
4804 
4805   // Create and register the offloading binary descriptors. This is the main
4806   // entity that captures all the information about offloading in the current
4807   // compilation unit.
4808   return createOffloadingBinaryDescriptorRegistration();
4809 }
4810 
4811 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
4812                                     const OMPExecutableDirective &D,
4813                                     SourceLocation Loc,
4814                                     llvm::Value *OutlinedFn,
4815                                     ArrayRef<llvm::Value *> CapturedVars) {
4816   if (!CGF.HaveInsertPoint())
4817     return;
4818 
4819   auto *RTLoc = emitUpdateLocation(CGF, Loc);
4820   CodeGenFunction::RunCleanupsScope Scope(CGF);
4821 
4822   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
4823   llvm::Value *Args[] = {
4824       RTLoc,
4825       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
4826       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
4827   llvm::SmallVector<llvm::Value *, 16> RealArgs;
4828   RealArgs.append(std::begin(Args), std::end(Args));
4829   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
4830 
4831   auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
4832   CGF.EmitRuntimeCall(RTLFn, RealArgs);
4833 }
4834 
4835 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
4836                                          const Expr *NumTeams,
4837                                          const Expr *ThreadLimit,
4838                                          SourceLocation Loc) {
4839   if (!CGF.HaveInsertPoint())
4840     return;
4841 
4842   auto *RTLoc = emitUpdateLocation(CGF, Loc);
4843 
4844   llvm::Value *NumTeamsVal =
4845       (NumTeams)
4846           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
4847                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
4848           : CGF.Builder.getInt32(0);
4849 
4850   llvm::Value *ThreadLimitVal =
4851       (ThreadLimit)
4852           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
4853                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
4854           : CGF.Builder.getInt32(0);
4855 
4856   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
4857   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
4858                                      ThreadLimitVal};
4859   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
4860                       PushNumTeamsArgs);
4861 }
4862