1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for OpenMP runtime code generation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGOpenMPRuntime.h"
17 #include "CodeGenFunction.h"
18 #include "clang/CodeGen/ConstantInitBuilder.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/BitmaskEnum.h"
23 #include "llvm/Bitcode/BitcodeReader.h"
24 #include "llvm/IR/CallSite.h"
25 #include "llvm/IR/DerivedTypes.h"
26 #include "llvm/IR/GlobalValue.h"
27 #include "llvm/IR/Value.h"
28 #include "llvm/Support/Format.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <cassert>
31 
32 using namespace clang;
33 using namespace CodeGen;
34 
35 namespace {
36 /// \brief Base class for handling code generation inside OpenMP regions.
37 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
38 public:
39   /// \brief Kinds of OpenMP regions used in codegen.
40   enum CGOpenMPRegionKind {
41     /// \brief Region with outlined function for standalone 'parallel'
42     /// directive.
43     ParallelOutlinedRegion,
44     /// \brief Region with outlined function for standalone 'task' directive.
45     TaskOutlinedRegion,
46     /// \brief Region for constructs that do not require function outlining,
47     /// like 'for', 'sections', 'atomic' etc. directives.
48     InlinedRegion,
49     /// \brief Region with outlined function for standalone 'target' directive.
50     TargetRegion,
51   };
52 
53   CGOpenMPRegionInfo(const CapturedStmt &CS,
54                      const CGOpenMPRegionKind RegionKind,
55                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
56                      bool HasCancel)
57       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
58         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
59 
60   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
61                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
62                      bool HasCancel)
63       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
64         Kind(Kind), HasCancel(HasCancel) {}
65 
66   /// \brief Get a variable or parameter for storing global thread id
67   /// inside OpenMP construct.
68   virtual const VarDecl *getThreadIDVariable() const = 0;
69 
70   /// \brief Emit the captured statement body.
71   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
72 
73   /// \brief Get an LValue for the current ThreadID variable.
74   /// \return LValue for thread id variable. This LValue always has type int32*.
75   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
76 
77   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
78 
79   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
80 
81   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
82 
83   bool hasCancel() const { return HasCancel; }
84 
85   static bool classof(const CGCapturedStmtInfo *Info) {
86     return Info->getKind() == CR_OpenMP;
87   }
88 
89   ~CGOpenMPRegionInfo() override = default;
90 
91 protected:
92   CGOpenMPRegionKind RegionKind;
93   RegionCodeGenTy CodeGen;
94   OpenMPDirectiveKind Kind;
95   bool HasCancel;
96 };
97 
98 /// \brief API for captured statement code generation in OpenMP constructs.
99 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
100 public:
101   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
102                              const RegionCodeGenTy &CodeGen,
103                              OpenMPDirectiveKind Kind, bool HasCancel,
104                              StringRef HelperName)
105       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
106                            HasCancel),
107         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
108     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
109   }
110 
111   /// \brief Get a variable or parameter for storing global thread id
112   /// inside OpenMP construct.
113   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
114 
115   /// \brief Get the name of the capture helper.
116   StringRef getHelperName() const override { return HelperName; }
117 
118   static bool classof(const CGCapturedStmtInfo *Info) {
119     return CGOpenMPRegionInfo::classof(Info) &&
120            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
121                ParallelOutlinedRegion;
122   }
123 
124 private:
125   /// \brief A variable or parameter storing global thread id for OpenMP
126   /// constructs.
127   const VarDecl *ThreadIDVar;
128   StringRef HelperName;
129 };
130 
131 /// \brief API for captured statement code generation in OpenMP constructs.
132 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
133 public:
134   class UntiedTaskActionTy final : public PrePostActionTy {
135     bool Untied;
136     const VarDecl *PartIDVar;
137     const RegionCodeGenTy UntiedCodeGen;
138     llvm::SwitchInst *UntiedSwitch = nullptr;
139 
140   public:
141     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
142                        const RegionCodeGenTy &UntiedCodeGen)
143         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
144     void Enter(CodeGenFunction &CGF) override {
145       if (Untied) {
146         // Emit task switching point.
147         auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
148             CGF.GetAddrOfLocalVar(PartIDVar),
149             PartIDVar->getType()->castAs<PointerType>());
150         auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, SourceLocation());
151         auto *DoneBB = CGF.createBasicBlock(".untied.done.");
152         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
153         CGF.EmitBlock(DoneBB);
154         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
155         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
156         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
157                               CGF.Builder.GetInsertBlock());
158         emitUntiedSwitch(CGF);
159       }
160     }
161     void emitUntiedSwitch(CodeGenFunction &CGF) const {
162       if (Untied) {
163         auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
164             CGF.GetAddrOfLocalVar(PartIDVar),
165             PartIDVar->getType()->castAs<PointerType>());
166         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
167                               PartIdLVal);
168         UntiedCodeGen(CGF);
169         CodeGenFunction::JumpDest CurPoint =
170             CGF.getJumpDestInCurrentScope(".untied.next.");
171         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
172         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
173         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
174                               CGF.Builder.GetInsertBlock());
175         CGF.EmitBranchThroughCleanup(CurPoint);
176         CGF.EmitBlock(CurPoint.getBlock());
177       }
178     }
179     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
180   };
181   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
182                                  const VarDecl *ThreadIDVar,
183                                  const RegionCodeGenTy &CodeGen,
184                                  OpenMPDirectiveKind Kind, bool HasCancel,
185                                  const UntiedTaskActionTy &Action)
186       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
187         ThreadIDVar(ThreadIDVar), Action(Action) {
188     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
189   }
190 
191   /// \brief Get a variable or parameter for storing global thread id
192   /// inside OpenMP construct.
193   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
194 
195   /// \brief Get an LValue for the current ThreadID variable.
196   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
197 
198   /// \brief Get the name of the capture helper.
199   StringRef getHelperName() const override { return ".omp_outlined."; }
200 
201   void emitUntiedSwitch(CodeGenFunction &CGF) override {
202     Action.emitUntiedSwitch(CGF);
203   }
204 
205   static bool classof(const CGCapturedStmtInfo *Info) {
206     return CGOpenMPRegionInfo::classof(Info) &&
207            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
208                TaskOutlinedRegion;
209   }
210 
211 private:
212   /// \brief A variable or parameter storing global thread id for OpenMP
213   /// constructs.
214   const VarDecl *ThreadIDVar;
215   /// Action for emitting code for untied tasks.
216   const UntiedTaskActionTy &Action;
217 };
218 
219 /// \brief API for inlined captured statement code generation in OpenMP
220 /// constructs.
221 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
222 public:
223   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
224                             const RegionCodeGenTy &CodeGen,
225                             OpenMPDirectiveKind Kind, bool HasCancel)
226       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
227         OldCSI(OldCSI),
228         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
229 
230   // \brief Retrieve the value of the context parameter.
231   llvm::Value *getContextValue() const override {
232     if (OuterRegionInfo)
233       return OuterRegionInfo->getContextValue();
234     llvm_unreachable("No context value for inlined OpenMP region");
235   }
236 
237   void setContextValue(llvm::Value *V) override {
238     if (OuterRegionInfo) {
239       OuterRegionInfo->setContextValue(V);
240       return;
241     }
242     llvm_unreachable("No context value for inlined OpenMP region");
243   }
244 
245   /// \brief Lookup the captured field decl for a variable.
246   const FieldDecl *lookup(const VarDecl *VD) const override {
247     if (OuterRegionInfo)
248       return OuterRegionInfo->lookup(VD);
249     // If there is no outer outlined region,no need to lookup in a list of
250     // captured variables, we can use the original one.
251     return nullptr;
252   }
253 
254   FieldDecl *getThisFieldDecl() const override {
255     if (OuterRegionInfo)
256       return OuterRegionInfo->getThisFieldDecl();
257     return nullptr;
258   }
259 
260   /// \brief Get a variable or parameter for storing global thread id
261   /// inside OpenMP construct.
262   const VarDecl *getThreadIDVariable() const override {
263     if (OuterRegionInfo)
264       return OuterRegionInfo->getThreadIDVariable();
265     return nullptr;
266   }
267 
268   /// \brief Get the name of the capture helper.
269   StringRef getHelperName() const override {
270     if (auto *OuterRegionInfo = getOldCSI())
271       return OuterRegionInfo->getHelperName();
272     llvm_unreachable("No helper name for inlined OpenMP construct");
273   }
274 
275   void emitUntiedSwitch(CodeGenFunction &CGF) override {
276     if (OuterRegionInfo)
277       OuterRegionInfo->emitUntiedSwitch(CGF);
278   }
279 
280   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
281 
282   static bool classof(const CGCapturedStmtInfo *Info) {
283     return CGOpenMPRegionInfo::classof(Info) &&
284            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
285   }
286 
287   ~CGOpenMPInlinedRegionInfo() override = default;
288 
289 private:
290   /// \brief CodeGen info about outer OpenMP region.
291   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
292   CGOpenMPRegionInfo *OuterRegionInfo;
293 };
294 
295 /// \brief API for captured statement code generation in OpenMP target
296 /// constructs. For this captures, implicit parameters are used instead of the
297 /// captured fields. The name of the target region has to be unique in a given
298 /// application so it is provided by the client, because only the client has
299 /// the information to generate that.
300 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
301 public:
302   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
303                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
304       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
305                            /*HasCancel=*/false),
306         HelperName(HelperName) {}
307 
308   /// \brief This is unused for target regions because each starts executing
309   /// with a single thread.
310   const VarDecl *getThreadIDVariable() const override { return nullptr; }
311 
312   /// \brief Get the name of the capture helper.
313   StringRef getHelperName() const override { return HelperName; }
314 
315   static bool classof(const CGCapturedStmtInfo *Info) {
316     return CGOpenMPRegionInfo::classof(Info) &&
317            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
318   }
319 
320 private:
321   StringRef HelperName;
322 };
323 
324 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
325   llvm_unreachable("No codegen for expressions");
326 }
327 /// \brief API for generation of expressions captured in a innermost OpenMP
328 /// region.
329 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
330 public:
331   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
332       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
333                                   OMPD_unknown,
334                                   /*HasCancel=*/false),
335         PrivScope(CGF) {
336     // Make sure the globals captured in the provided statement are local by
337     // using the privatization logic. We assume the same variable is not
338     // captured more than once.
339     for (auto &C : CS.captures()) {
340       if (!C.capturesVariable() && !C.capturesVariableByCopy())
341         continue;
342 
343       const VarDecl *VD = C.getCapturedVar();
344       if (VD->isLocalVarDeclOrParm())
345         continue;
346 
347       DeclRefExpr DRE(const_cast<VarDecl *>(VD),
348                       /*RefersToEnclosingVariableOrCapture=*/false,
349                       VD->getType().getNonReferenceType(), VK_LValue,
350                       SourceLocation());
351       PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address {
352         return CGF.EmitLValue(&DRE).getAddress();
353       });
354     }
355     (void)PrivScope.Privatize();
356   }
357 
358   /// \brief Lookup the captured field decl for a variable.
359   const FieldDecl *lookup(const VarDecl *VD) const override {
360     if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
361       return FD;
362     return nullptr;
363   }
364 
365   /// \brief Emit the captured statement body.
366   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
367     llvm_unreachable("No body for expressions");
368   }
369 
370   /// \brief Get a variable or parameter for storing global thread id
371   /// inside OpenMP construct.
372   const VarDecl *getThreadIDVariable() const override {
373     llvm_unreachable("No thread id for expressions");
374   }
375 
376   /// \brief Get the name of the capture helper.
377   StringRef getHelperName() const override {
378     llvm_unreachable("No helper name for expressions");
379   }
380 
381   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
382 
383 private:
384   /// Private scope to capture global variables.
385   CodeGenFunction::OMPPrivateScope PrivScope;
386 };
387 
388 /// \brief RAII for emitting code of OpenMP constructs.
389 class InlinedOpenMPRegionRAII {
390   CodeGenFunction &CGF;
391   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
392   FieldDecl *LambdaThisCaptureField = nullptr;
393 
394 public:
395   /// \brief Constructs region for combined constructs.
396   /// \param CodeGen Code generation sequence for combined directives. Includes
397   /// a list of functions used for code generation of implicitly inlined
398   /// regions.
399   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
400                           OpenMPDirectiveKind Kind, bool HasCancel)
401       : CGF(CGF) {
402     // Start emission for the construct.
403     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
404         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
405     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
406     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
407     CGF.LambdaThisCaptureField = nullptr;
408   }
409 
410   ~InlinedOpenMPRegionRAII() {
411     // Restore original CapturedStmtInfo only if we're done with code emission.
412     auto *OldCSI =
413         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
414     delete CGF.CapturedStmtInfo;
415     CGF.CapturedStmtInfo = OldCSI;
416     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
417     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
418   }
419 };
420 
421 /// \brief Values for bit flags used in the ident_t to describe the fields.
422 /// All enumeric elements are named and described in accordance with the code
423 /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
424 enum OpenMPLocationFlags : unsigned {
425   /// \brief Use trampoline for internal microtask.
426   OMP_IDENT_IMD = 0x01,
427   /// \brief Use c-style ident structure.
428   OMP_IDENT_KMPC = 0x02,
429   /// \brief Atomic reduction option for kmpc_reduce.
430   OMP_ATOMIC_REDUCE = 0x10,
431   /// \brief Explicit 'barrier' directive.
432   OMP_IDENT_BARRIER_EXPL = 0x20,
433   /// \brief Implicit barrier in code.
434   OMP_IDENT_BARRIER_IMPL = 0x40,
435   /// \brief Implicit barrier in 'for' directive.
436   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
437   /// \brief Implicit barrier in 'sections' directive.
438   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
439   /// \brief Implicit barrier in 'single' directive.
440   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
441   /// Call of __kmp_for_static_init for static loop.
442   OMP_IDENT_WORK_LOOP = 0x200,
443   /// Call of __kmp_for_static_init for sections.
444   OMP_IDENT_WORK_SECTIONS = 0x400,
445   /// Call of __kmp_for_static_init for distribute.
446   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
447   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
448 };
449 
450 /// \brief Describes ident structure that describes a source location.
451 /// All descriptions are taken from
452 /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
453 /// Original structure:
454 /// typedef struct ident {
455 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
456 ///                                  see above  */
457 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
458 ///                                  KMP_IDENT_KMPC identifies this union
459 ///                                  member  */
460 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
461 ///                                  see above */
462 ///#if USE_ITT_BUILD
463 ///                            /*  but currently used for storing
464 ///                                region-specific ITT */
465 ///                            /*  contextual information. */
466 ///#endif /* USE_ITT_BUILD */
467 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
468 ///                                 C++  */
469 ///    char const *psource;    /**< String describing the source location.
470 ///                            The string is composed of semi-colon separated
471 //                             fields which describe the source file,
472 ///                            the function and a pair of line numbers that
473 ///                            delimit the construct.
474 ///                             */
475 /// } ident_t;
476 enum IdentFieldIndex {
477   /// \brief might be used in Fortran
478   IdentField_Reserved_1,
479   /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
480   IdentField_Flags,
481   /// \brief Not really used in Fortran any more
482   IdentField_Reserved_2,
483   /// \brief Source[4] in Fortran, do not use for C++
484   IdentField_Reserved_3,
485   /// \brief String describing the source location. The string is composed of
486   /// semi-colon separated fields which describe the source file, the function
487   /// and a pair of line numbers that delimit the construct.
488   IdentField_PSource
489 };
490 
491 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from
492 /// the enum sched_type in kmp.h).
493 enum OpenMPSchedType {
494   /// \brief Lower bound for default (unordered) versions.
495   OMP_sch_lower = 32,
496   OMP_sch_static_chunked = 33,
497   OMP_sch_static = 34,
498   OMP_sch_dynamic_chunked = 35,
499   OMP_sch_guided_chunked = 36,
500   OMP_sch_runtime = 37,
501   OMP_sch_auto = 38,
502   /// static with chunk adjustment (e.g., simd)
503   OMP_sch_static_balanced_chunked = 45,
504   /// \brief Lower bound for 'ordered' versions.
505   OMP_ord_lower = 64,
506   OMP_ord_static_chunked = 65,
507   OMP_ord_static = 66,
508   OMP_ord_dynamic_chunked = 67,
509   OMP_ord_guided_chunked = 68,
510   OMP_ord_runtime = 69,
511   OMP_ord_auto = 70,
512   OMP_sch_default = OMP_sch_static,
513   /// \brief dist_schedule types
514   OMP_dist_sch_static_chunked = 91,
515   OMP_dist_sch_static = 92,
516   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
517   /// Set if the monotonic schedule modifier was present.
518   OMP_sch_modifier_monotonic = (1 << 29),
519   /// Set if the nonmonotonic schedule modifier was present.
520   OMP_sch_modifier_nonmonotonic = (1 << 30),
521 };
522 
523 enum OpenMPRTLFunction {
524   /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
525   /// kmpc_micro microtask, ...);
526   OMPRTL__kmpc_fork_call,
527   /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc,
528   /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
529   OMPRTL__kmpc_threadprivate_cached,
530   /// \brief Call to void __kmpc_threadprivate_register( ident_t *,
531   /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
532   OMPRTL__kmpc_threadprivate_register,
533   // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
534   OMPRTL__kmpc_global_thread_num,
535   // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
536   // kmp_critical_name *crit);
537   OMPRTL__kmpc_critical,
538   // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
539   // global_tid, kmp_critical_name *crit, uintptr_t hint);
540   OMPRTL__kmpc_critical_with_hint,
541   // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
542   // kmp_critical_name *crit);
543   OMPRTL__kmpc_end_critical,
544   // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
545   // global_tid);
546   OMPRTL__kmpc_cancel_barrier,
547   // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
548   OMPRTL__kmpc_barrier,
549   // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
550   OMPRTL__kmpc_for_static_fini,
551   // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
552   // global_tid);
553   OMPRTL__kmpc_serialized_parallel,
554   // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
555   // global_tid);
556   OMPRTL__kmpc_end_serialized_parallel,
557   // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
558   // kmp_int32 num_threads);
559   OMPRTL__kmpc_push_num_threads,
560   // Call to void __kmpc_flush(ident_t *loc);
561   OMPRTL__kmpc_flush,
562   // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
563   OMPRTL__kmpc_master,
564   // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
565   OMPRTL__kmpc_end_master,
566   // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
567   // int end_part);
568   OMPRTL__kmpc_omp_taskyield,
569   // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
570   OMPRTL__kmpc_single,
571   // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
572   OMPRTL__kmpc_end_single,
573   // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
574   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
575   // kmp_routine_entry_t *task_entry);
576   OMPRTL__kmpc_omp_task_alloc,
577   // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
578   // new_task);
579   OMPRTL__kmpc_omp_task,
580   // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
581   // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
582   // kmp_int32 didit);
583   OMPRTL__kmpc_copyprivate,
584   // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
585   // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
586   // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
587   OMPRTL__kmpc_reduce,
588   // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
589   // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
590   // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
591   // *lck);
592   OMPRTL__kmpc_reduce_nowait,
593   // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
594   // kmp_critical_name *lck);
595   OMPRTL__kmpc_end_reduce,
596   // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
597   // kmp_critical_name *lck);
598   OMPRTL__kmpc_end_reduce_nowait,
599   // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
600   // kmp_task_t * new_task);
601   OMPRTL__kmpc_omp_task_begin_if0,
602   // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
603   // kmp_task_t * new_task);
604   OMPRTL__kmpc_omp_task_complete_if0,
605   // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
606   OMPRTL__kmpc_ordered,
607   // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
608   OMPRTL__kmpc_end_ordered,
609   // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
610   // global_tid);
611   OMPRTL__kmpc_omp_taskwait,
612   // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
613   OMPRTL__kmpc_taskgroup,
614   // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
615   OMPRTL__kmpc_end_taskgroup,
616   // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
617   // int proc_bind);
618   OMPRTL__kmpc_push_proc_bind,
619   // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
620   // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
621   // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
622   OMPRTL__kmpc_omp_task_with_deps,
623   // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
624   // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
625   // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
626   OMPRTL__kmpc_omp_wait_deps,
627   // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
628   // global_tid, kmp_int32 cncl_kind);
629   OMPRTL__kmpc_cancellationpoint,
630   // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
631   // kmp_int32 cncl_kind);
632   OMPRTL__kmpc_cancel,
633   // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
634   // kmp_int32 num_teams, kmp_int32 thread_limit);
635   OMPRTL__kmpc_push_num_teams,
636   // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
637   // microtask, ...);
638   OMPRTL__kmpc_fork_teams,
639   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
640   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
641   // sched, kmp_uint64 grainsize, void *task_dup);
642   OMPRTL__kmpc_taskloop,
643   // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
644   // num_dims, struct kmp_dim *dims);
645   OMPRTL__kmpc_doacross_init,
646   // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
647   OMPRTL__kmpc_doacross_fini,
648   // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
649   // *vec);
650   OMPRTL__kmpc_doacross_post,
651   // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
652   // *vec);
653   OMPRTL__kmpc_doacross_wait,
654   // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
655   // *data);
656   OMPRTL__kmpc_task_reduction_init,
657   // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
658   // *d);
659   OMPRTL__kmpc_task_reduction_get_th_data,
660 
661   //
662   // Offloading related calls
663   //
664   // Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
665   // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
666   // *arg_types);
667   OMPRTL__tgt_target,
668   // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
669   // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
670   // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
671   OMPRTL__tgt_target_teams,
672   // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
673   OMPRTL__tgt_register_lib,
674   // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
675   OMPRTL__tgt_unregister_lib,
676   // Call to void __tgt_target_data_begin(int32_t device_id, int32_t arg_num,
677   // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
678   OMPRTL__tgt_target_data_begin,
679   // Call to void __tgt_target_data_end(int32_t device_id, int32_t arg_num,
680   // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
681   OMPRTL__tgt_target_data_end,
682   // Call to void __tgt_target_data_update(int32_t device_id, int32_t arg_num,
683   // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
684   OMPRTL__tgt_target_data_update,
685 };
686 
687 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
688 /// region.
689 class CleanupTy final : public EHScopeStack::Cleanup {
690   PrePostActionTy *Action;
691 
692 public:
693   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
694   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
695     if (!CGF.HaveInsertPoint())
696       return;
697     Action->Exit(CGF);
698   }
699 };
700 
701 } // anonymous namespace
702 
703 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
704   CodeGenFunction::RunCleanupsScope Scope(CGF);
705   if (PrePostAction) {
706     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
707     Callback(CodeGen, CGF, *PrePostAction);
708   } else {
709     PrePostActionTy Action;
710     Callback(CodeGen, CGF, Action);
711   }
712 }
713 
714 /// Check if the combiner is a call to UDR combiner and if it is so return the
715 /// UDR decl used for reduction.
716 static const OMPDeclareReductionDecl *
717 getReductionInit(const Expr *ReductionOp) {
718   if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
719     if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
720       if (auto *DRE =
721               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
722         if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
723           return DRD;
724   return nullptr;
725 }
726 
727 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
728                                              const OMPDeclareReductionDecl *DRD,
729                                              const Expr *InitOp,
730                                              Address Private, Address Original,
731                                              QualType Ty) {
732   if (DRD->getInitializer()) {
733     std::pair<llvm::Function *, llvm::Function *> Reduction =
734         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
735     auto *CE = cast<CallExpr>(InitOp);
736     auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
737     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
738     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
739     auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
740     auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
741     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
742     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
743                             [=]() -> Address { return Private; });
744     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
745                             [=]() -> Address { return Original; });
746     (void)PrivateScope.Privatize();
747     RValue Func = RValue::get(Reduction.second);
748     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
749     CGF.EmitIgnoredExpr(InitOp);
750   } else {
751     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
752     auto *GV = new llvm::GlobalVariable(
753         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
754         llvm::GlobalValue::PrivateLinkage, Init, ".init");
755     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
756     RValue InitRVal;
757     switch (CGF.getEvaluationKind(Ty)) {
758     case TEK_Scalar:
759       InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation());
760       break;
761     case TEK_Complex:
762       InitRVal =
763           RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation()));
764       break;
765     case TEK_Aggregate:
766       InitRVal = RValue::getAggregate(LV.getAddress());
767       break;
768     }
769     OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue);
770     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
771     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
772                          /*IsInitializer=*/false);
773   }
774 }
775 
776 /// \brief Emit initialization of arrays of complex types.
777 /// \param DestAddr Address of the array.
778 /// \param Type Type of array.
779 /// \param Init Initial expression of array.
780 /// \param SrcAddr Address of the original array.
781 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
782                                  QualType Type, const Expr *Init,
783                                  const OMPDeclareReductionDecl *DRD,
784                                  Address SrcAddr = Address::invalid()) {
785   // Perform element-by-element initialization.
786   QualType ElementTy;
787 
788   // Drill down to the base element type on both arrays.
789   auto ArrayTy = Type->getAsArrayTypeUnsafe();
790   auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
791   DestAddr =
792       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
793   if (DRD)
794     SrcAddr =
795         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
796 
797   llvm::Value *SrcBegin = nullptr;
798   if (DRD)
799     SrcBegin = SrcAddr.getPointer();
800   auto DestBegin = DestAddr.getPointer();
801   // Cast from pointer to array type to pointer to single element.
802   auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
803   // The basic structure here is a while-do loop.
804   auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
805   auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
806   auto IsEmpty =
807       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
808   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
809 
810   // Enter the loop body, making that address the current address.
811   auto EntryBB = CGF.Builder.GetInsertBlock();
812   CGF.EmitBlock(BodyBB);
813 
814   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
815 
816   llvm::PHINode *SrcElementPHI = nullptr;
817   Address SrcElementCurrent = Address::invalid();
818   if (DRD) {
819     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
820                                           "omp.arraycpy.srcElementPast");
821     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
822     SrcElementCurrent =
823         Address(SrcElementPHI,
824                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
825   }
826   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
827       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
828   DestElementPHI->addIncoming(DestBegin, EntryBB);
829   Address DestElementCurrent =
830       Address(DestElementPHI,
831               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
832 
833   // Emit copy.
834   {
835     CodeGenFunction::RunCleanupsScope InitScope(CGF);
836     if (DRD && (DRD->getInitializer() || !Init)) {
837       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
838                                        SrcElementCurrent, ElementTy);
839     } else
840       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
841                            /*IsInitializer=*/false);
842   }
843 
844   if (DRD) {
845     // Shift the address forward by one element.
846     auto SrcElementNext = CGF.Builder.CreateConstGEP1_32(
847         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
848     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
849   }
850 
851   // Shift the address forward by one element.
852   auto DestElementNext = CGF.Builder.CreateConstGEP1_32(
853       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
854   // Check whether we've reached the end.
855   auto Done =
856       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
857   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
858   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
859 
860   // Done.
861   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
862 }
863 
864 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
865   return CGF.EmitOMPSharedLValue(E);
866 }
867 
868 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
869                                             const Expr *E) {
870   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
871     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
872   return LValue();
873 }
874 
875 void ReductionCodeGen::emitAggregateInitialization(
876     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
877     const OMPDeclareReductionDecl *DRD) {
878   // Emit VarDecl with copy init for arrays.
879   // Get the address of the original variable captured in current
880   // captured region.
881   auto *PrivateVD =
882       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
883   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
884                        DRD ? ClausesData[N].ReductionOp : PrivateVD->getInit(),
885                        DRD, SharedLVal.getAddress());
886 }
887 
888 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
889                                    ArrayRef<const Expr *> Privates,
890                                    ArrayRef<const Expr *> ReductionOps) {
891   ClausesData.reserve(Shareds.size());
892   SharedAddresses.reserve(Shareds.size());
893   Sizes.reserve(Shareds.size());
894   BaseDecls.reserve(Shareds.size());
895   auto IPriv = Privates.begin();
896   auto IRed = ReductionOps.begin();
897   for (const auto *Ref : Shareds) {
898     ClausesData.emplace_back(Ref, *IPriv, *IRed);
899     std::advance(IPriv, 1);
900     std::advance(IRed, 1);
901   }
902 }
903 
904 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
905   assert(SharedAddresses.size() == N &&
906          "Number of generated lvalues must be exactly N.");
907   SharedAddresses.emplace_back(emitSharedLValue(CGF, ClausesData[N].Ref),
908                                emitSharedLValueUB(CGF, ClausesData[N].Ref));
909 }
910 
911 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
912   auto *PrivateVD =
913       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
914   QualType PrivateType = PrivateVD->getType();
915   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
916   if (!AsArraySection && !PrivateType->isVariablyModifiedType()) {
917     Sizes.emplace_back(
918         CGF.getTypeSize(
919             SharedAddresses[N].first.getType().getNonReferenceType()),
920         nullptr);
921     return;
922   }
923   llvm::Value *Size;
924   llvm::Value *SizeInChars;
925   llvm::Type *ElemType =
926       cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType())
927           ->getElementType();
928   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
929   if (AsArraySection) {
930     Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
931                                      SharedAddresses[N].first.getPointer());
932     Size = CGF.Builder.CreateNUWAdd(
933         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
934     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
935   } else {
936     SizeInChars = CGF.getTypeSize(
937         SharedAddresses[N].first.getType().getNonReferenceType());
938     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
939   }
940   Sizes.emplace_back(SizeInChars, Size);
941   CodeGenFunction::OpaqueValueMapping OpaqueMap(
942       CGF,
943       cast<OpaqueValueExpr>(
944           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
945       RValue::get(Size));
946   CGF.EmitVariablyModifiedType(PrivateType);
947 }
948 
949 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
950                                          llvm::Value *Size) {
951   auto *PrivateVD =
952       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
953   QualType PrivateType = PrivateVD->getType();
954   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
955   if (!AsArraySection && !PrivateType->isVariablyModifiedType()) {
956     assert(!Size && !Sizes[N].second &&
957            "Size should be nullptr for non-variably modified redution "
958            "items.");
959     return;
960   }
961   CodeGenFunction::OpaqueValueMapping OpaqueMap(
962       CGF,
963       cast<OpaqueValueExpr>(
964           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
965       RValue::get(Size));
966   CGF.EmitVariablyModifiedType(PrivateType);
967 }
968 
969 void ReductionCodeGen::emitInitialization(
970     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
971     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
972   assert(SharedAddresses.size() > N && "No variable was generated");
973   auto *PrivateVD =
974       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
975   auto *DRD = getReductionInit(ClausesData[N].ReductionOp);
976   QualType PrivateType = PrivateVD->getType();
977   PrivateAddr = CGF.Builder.CreateElementBitCast(
978       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
979   QualType SharedType = SharedAddresses[N].first.getType();
980   SharedLVal = CGF.MakeAddrLValue(
981       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
982                                        CGF.ConvertTypeForMem(SharedType)),
983       SharedType, SharedAddresses[N].first.getBaseInfo());
984   if (isa<OMPArraySectionExpr>(ClausesData[N].Ref) ||
985       CGF.getContext().getAsArrayType(PrivateVD->getType())) {
986     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
987   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
988     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
989                                      PrivateAddr, SharedLVal.getAddress(),
990                                      SharedLVal.getType());
991   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
992              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
993     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
994                          PrivateVD->getType().getQualifiers(),
995                          /*IsInitializer=*/false);
996   }
997 }
998 
999 bool ReductionCodeGen::needCleanups(unsigned N) {
1000   auto *PrivateVD =
1001       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1002   QualType PrivateType = PrivateVD->getType();
1003   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1004   return DTorKind != QualType::DK_none;
1005 }
1006 
1007 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
1008                                     Address PrivateAddr) {
1009   auto *PrivateVD =
1010       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1011   QualType PrivateType = PrivateVD->getType();
1012   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1013   if (needCleanups(N)) {
1014     PrivateAddr = CGF.Builder.CreateElementBitCast(
1015         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1016     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1017   }
1018 }
1019 
1020 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1021                           LValue BaseLV) {
1022   BaseTy = BaseTy.getNonReferenceType();
1023   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1024          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1025     if (auto *PtrTy = BaseTy->getAs<PointerType>())
1026       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
1027     else {
1028       BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(),
1029                                              BaseTy->castAs<ReferenceType>());
1030     }
1031     BaseTy = BaseTy->getPointeeType();
1032   }
1033   return CGF.MakeAddrLValue(
1034       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(),
1035                                        CGF.ConvertTypeForMem(ElTy)),
1036       BaseLV.getType(), BaseLV.getBaseInfo());
1037 }
1038 
1039 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1040                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1041                           llvm::Value *Addr) {
1042   Address Tmp = Address::invalid();
1043   Address TopTmp = Address::invalid();
1044   Address MostTopTmp = Address::invalid();
1045   BaseTy = BaseTy.getNonReferenceType();
1046   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1047          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1048     Tmp = CGF.CreateMemTemp(BaseTy);
1049     if (TopTmp.isValid())
1050       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1051     else
1052       MostTopTmp = Tmp;
1053     TopTmp = Tmp;
1054     BaseTy = BaseTy->getPointeeType();
1055   }
1056   llvm::Type *Ty = BaseLVType;
1057   if (Tmp.isValid())
1058     Ty = Tmp.getElementType();
1059   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1060   if (Tmp.isValid()) {
1061     CGF.Builder.CreateStore(Addr, Tmp);
1062     return MostTopTmp;
1063   }
1064   return Address(Addr, BaseLVAlignment);
1065 }
1066 
1067 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1068                                                Address PrivateAddr) {
1069   const DeclRefExpr *DE;
1070   const VarDecl *OrigVD = nullptr;
1071   if (auto *OASE = dyn_cast<OMPArraySectionExpr>(ClausesData[N].Ref)) {
1072     auto *Base = OASE->getBase()->IgnoreParenImpCasts();
1073     while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1074       Base = TempOASE->getBase()->IgnoreParenImpCasts();
1075     while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1076       Base = TempASE->getBase()->IgnoreParenImpCasts();
1077     DE = cast<DeclRefExpr>(Base);
1078     OrigVD = cast<VarDecl>(DE->getDecl());
1079   } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(ClausesData[N].Ref)) {
1080     auto *Base = ASE->getBase()->IgnoreParenImpCasts();
1081     while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1082       Base = TempASE->getBase()->IgnoreParenImpCasts();
1083     DE = cast<DeclRefExpr>(Base);
1084     OrigVD = cast<VarDecl>(DE->getDecl());
1085   }
1086   if (OrigVD) {
1087     BaseDecls.emplace_back(OrigVD);
1088     auto OriginalBaseLValue = CGF.EmitLValue(DE);
1089     LValue BaseLValue =
1090         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1091                     OriginalBaseLValue);
1092     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1093         BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
1094     llvm::Value *Ptr =
1095         CGF.Builder.CreateGEP(PrivateAddr.getPointer(), Adjustment);
1096     return castToBase(CGF, OrigVD->getType(),
1097                       SharedAddresses[N].first.getType(),
1098                       OriginalBaseLValue.getPointer()->getType(),
1099                       OriginalBaseLValue.getAlignment(), Ptr);
1100   }
1101   BaseDecls.emplace_back(
1102       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1103   return PrivateAddr;
1104 }
1105 
1106 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1107   auto *DRD = getReductionInit(ClausesData[N].ReductionOp);
1108   return DRD && DRD->getInitializer();
1109 }
1110 
1111 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1112   return CGF.EmitLoadOfPointerLValue(
1113       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1114       getThreadIDVariable()->getType()->castAs<PointerType>());
1115 }
1116 
1117 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1118   if (!CGF.HaveInsertPoint())
1119     return;
1120   // 1.2.2 OpenMP Language Terminology
1121   // Structured block - An executable statement with a single entry at the
1122   // top and a single exit at the bottom.
1123   // The point of exit cannot be a branch out of the structured block.
1124   // longjmp() and throw() must not violate the entry/exit criteria.
1125   CGF.EHStack.pushTerminate();
1126   CodeGen(CGF);
1127   CGF.EHStack.popTerminate();
1128 }
1129 
1130 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1131     CodeGenFunction &CGF) {
1132   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1133                             getThreadIDVariable()->getType(),
1134                             LValueBaseInfo(AlignmentSource::Decl, false));
1135 }
1136 
1137 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
1138     : CGM(CGM), OffloadEntriesInfoManager(CGM) {
1139   IdentTy = llvm::StructType::create(
1140       "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
1141       CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
1142       CGM.Int8PtrTy /* psource */);
1143   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1144 
1145   loadOffloadInfoMetadata();
1146 }
1147 
1148 void CGOpenMPRuntime::clear() {
1149   InternalVars.clear();
1150 }
1151 
1152 static llvm::Function *
1153 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1154                           const Expr *CombinerInitializer, const VarDecl *In,
1155                           const VarDecl *Out, bool IsCombiner) {
1156   // void .omp_combiner.(Ty *in, Ty *out);
1157   auto &C = CGM.getContext();
1158   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1159   FunctionArgList Args;
1160   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1161                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1162   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1163                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1164   Args.push_back(&OmpOutParm);
1165   Args.push_back(&OmpInParm);
1166   auto &FnInfo =
1167       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1168   auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1169   auto *Fn = llvm::Function::Create(
1170       FnTy, llvm::GlobalValue::InternalLinkage,
1171       IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule());
1172   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
1173   Fn->removeFnAttr(llvm::Attribute::NoInline);
1174   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1175   Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1176   CodeGenFunction CGF(CGM);
1177   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1178   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1179   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
1180   CodeGenFunction::OMPPrivateScope Scope(CGF);
1181   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1182   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address {
1183     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1184         .getAddress();
1185   });
1186   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1187   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() -> Address {
1188     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1189         .getAddress();
1190   });
1191   (void)Scope.Privatize();
1192   if (!IsCombiner && Out->hasInit() &&
1193       !CGF.isTrivialInitializer(Out->getInit())) {
1194     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1195                          Out->getType().getQualifiers(),
1196                          /*IsInitializer=*/true);
1197   }
1198   if (CombinerInitializer)
1199     CGF.EmitIgnoredExpr(CombinerInitializer);
1200   Scope.ForceCleanup();
1201   CGF.FinishFunction();
1202   return Fn;
1203 }
1204 
1205 void CGOpenMPRuntime::emitUserDefinedReduction(
1206     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1207   if (UDRMap.count(D) > 0)
1208     return;
1209   auto &C = CGM.getContext();
1210   if (!In || !Out) {
1211     In = &C.Idents.get("omp_in");
1212     Out = &C.Idents.get("omp_out");
1213   }
1214   llvm::Function *Combiner = emitCombinerOrInitializer(
1215       CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()),
1216       cast<VarDecl>(D->lookup(Out).front()),
1217       /*IsCombiner=*/true);
1218   llvm::Function *Initializer = nullptr;
1219   if (auto *Init = D->getInitializer()) {
1220     if (!Priv || !Orig) {
1221       Priv = &C.Idents.get("omp_priv");
1222       Orig = &C.Idents.get("omp_orig");
1223     }
1224     Initializer = emitCombinerOrInitializer(
1225         CGM, D->getType(),
1226         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1227                                                                      : nullptr,
1228         cast<VarDecl>(D->lookup(Orig).front()),
1229         cast<VarDecl>(D->lookup(Priv).front()),
1230         /*IsCombiner=*/false);
1231   }
1232   UDRMap.insert(std::make_pair(D, std::make_pair(Combiner, Initializer)));
1233   if (CGF) {
1234     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1235     Decls.second.push_back(D);
1236   }
1237 }
1238 
1239 std::pair<llvm::Function *, llvm::Function *>
1240 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1241   auto I = UDRMap.find(D);
1242   if (I != UDRMap.end())
1243     return I->second;
1244   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1245   return UDRMap.lookup(D);
1246 }
1247 
1248 // Layout information for ident_t.
1249 static CharUnits getIdentAlign(CodeGenModule &CGM) {
1250   return CGM.getPointerAlign();
1251 }
1252 static CharUnits getIdentSize(CodeGenModule &CGM) {
1253   assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign()));
1254   return CharUnits::fromQuantity(16) + CGM.getPointerSize();
1255 }
1256 static CharUnits getOffsetOfIdentField(IdentFieldIndex Field) {
1257   // All the fields except the last are i32, so this works beautifully.
1258   return unsigned(Field) * CharUnits::fromQuantity(4);
1259 }
1260 static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr,
1261                                    IdentFieldIndex Field,
1262                                    const llvm::Twine &Name = "") {
1263   auto Offset = getOffsetOfIdentField(Field);
1264   return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name);
1265 }
1266 
1267 static llvm::Value *emitParallelOrTeamsOutlinedFunction(
1268     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1269     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1270     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1271   assert(ThreadIDVar->getType()->isPointerType() &&
1272          "thread id variable must be of type kmp_int32 *");
1273   CodeGenFunction CGF(CGM, true);
1274   bool HasCancel = false;
1275   if (auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1276     HasCancel = OPD->hasCancel();
1277   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1278     HasCancel = OPSD->hasCancel();
1279   else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1280     HasCancel = OPFD->hasCancel();
1281   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1282                                     HasCancel, OutlinedHelperName);
1283   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1284   return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
1285 }
1286 
1287 llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction(
1288     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1289     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1290   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1291   return emitParallelOrTeamsOutlinedFunction(
1292       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1293 }
1294 
1295 llvm::Value *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1296     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1297     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1298   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1299   return emitParallelOrTeamsOutlinedFunction(
1300       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1301 }
1302 
1303 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
1304     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1305     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1306     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1307     bool Tied, unsigned &NumberOfParts) {
1308   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1309                                               PrePostActionTy &) {
1310     auto *ThreadID = getThreadID(CGF, D.getLocStart());
1311     auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart());
1312     llvm::Value *TaskArgs[] = {
1313         UpLoc, ThreadID,
1314         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1315                                     TaskTVar->getType()->castAs<PointerType>())
1316             .getPointer()};
1317     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1318   };
1319   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1320                                                             UntiedCodeGen);
1321   CodeGen.setAction(Action);
1322   assert(!ThreadIDVar->getType()->isPointerType() &&
1323          "thread id variable must be of type kmp_int32 for tasks");
1324   auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
1325   auto *TD = dyn_cast<OMPTaskDirective>(&D);
1326   CodeGenFunction CGF(CGM, true);
1327   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1328                                         InnermostKind,
1329                                         TD ? TD->hasCancel() : false, Action);
1330   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1331   auto *Res = CGF.GenerateCapturedStmtFunction(*CS);
1332   if (!Tied)
1333     NumberOfParts = Action.getNumberOfParts();
1334   return Res;
1335 }
1336 
1337 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1338   CharUnits Align = getIdentAlign(CGM);
1339   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
1340   if (!Entry) {
1341     if (!DefaultOpenMPPSource) {
1342       // Initialize default location for psource field of ident_t structure of
1343       // all ident_t objects. Format is ";file;function;line;column;;".
1344       // Taken from
1345       // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
1346       DefaultOpenMPPSource =
1347           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1348       DefaultOpenMPPSource =
1349           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1350     }
1351 
1352     ConstantInitBuilder builder(CGM);
1353     auto fields = builder.beginStruct(IdentTy);
1354     fields.addInt(CGM.Int32Ty, 0);
1355     fields.addInt(CGM.Int32Ty, Flags);
1356     fields.addInt(CGM.Int32Ty, 0);
1357     fields.addInt(CGM.Int32Ty, 0);
1358     fields.add(DefaultOpenMPPSource);
1359     auto DefaultOpenMPLocation =
1360       fields.finishAndCreateGlobal("", Align, /*isConstant*/ true,
1361                                    llvm::GlobalValue::PrivateLinkage);
1362     DefaultOpenMPLocation->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
1363 
1364     OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
1365   }
1366   return Address(Entry, Align);
1367 }
1368 
1369 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1370                                                  SourceLocation Loc,
1371                                                  unsigned Flags) {
1372   Flags |= OMP_IDENT_KMPC;
1373   // If no debug info is generated - return global default location.
1374   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1375       Loc.isInvalid())
1376     return getOrCreateDefaultLocation(Flags).getPointer();
1377 
1378   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1379 
1380   Address LocValue = Address::invalid();
1381   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1382   if (I != OpenMPLocThreadIDMap.end())
1383     LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM));
1384 
1385   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1386   // GetOpenMPThreadID was called before this routine.
1387   if (!LocValue.isValid()) {
1388     // Generate "ident_t .kmpc_loc.addr;"
1389     Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM),
1390                                       ".kmpc_loc.addr");
1391     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1392     Elem.second.DebugLoc = AI.getPointer();
1393     LocValue = AI;
1394 
1395     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1396     CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
1397     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1398                              CGM.getSize(getIdentSize(CGF.CGM)));
1399   }
1400 
1401   // char **psource = &.kmpc_loc_<flags>.addr.psource;
1402   Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource);
1403 
1404   auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1405   if (OMPDebugLoc == nullptr) {
1406     SmallString<128> Buffer2;
1407     llvm::raw_svector_ostream OS2(Buffer2);
1408     // Build debug location
1409     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1410     OS2 << ";" << PLoc.getFilename() << ";";
1411     if (const FunctionDecl *FD =
1412             dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
1413       OS2 << FD->getQualifiedNameAsString();
1414     }
1415     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1416     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1417     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1418   }
1419   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1420   CGF.Builder.CreateStore(OMPDebugLoc, PSource);
1421 
1422   // Our callers always pass this to a runtime function, so for
1423   // convenience, go ahead and return a naked pointer.
1424   return LocValue.getPointer();
1425 }
1426 
1427 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1428                                           SourceLocation Loc) {
1429   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1430 
1431   llvm::Value *ThreadID = nullptr;
1432   // Check whether we've already cached a load of the thread id in this
1433   // function.
1434   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1435   if (I != OpenMPLocThreadIDMap.end()) {
1436     ThreadID = I->second.ThreadID;
1437     if (ThreadID != nullptr)
1438       return ThreadID;
1439   }
1440   // If exceptions are enabled, do not use parameter to avoid possible crash.
1441   if (!CGF.getInvokeDest()) {
1442     if (auto *OMPRegionInfo =
1443             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1444       if (OMPRegionInfo->getThreadIDVariable()) {
1445         // Check if this an outlined function with thread id passed as argument.
1446         auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1447         ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
1448         // If value loaded in entry block, cache it and use it everywhere in
1449         // function.
1450         if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1451           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1452           Elem.second.ThreadID = ThreadID;
1453         }
1454         return ThreadID;
1455       }
1456     }
1457   }
1458 
1459   // This is not an outlined function region - need to call __kmpc_int32
1460   // kmpc_global_thread_num(ident_t *loc).
1461   // Generate thread id value and cache this value for use across the
1462   // function.
1463   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1464   CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
1465   ThreadID =
1466       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1467                           emitUpdateLocation(CGF, Loc));
1468   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1469   Elem.second.ThreadID = ThreadID;
1470   return ThreadID;
1471 }
1472 
1473 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1474   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1475   if (OpenMPLocThreadIDMap.count(CGF.CurFn))
1476     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1477   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1478     for(auto *D : FunctionUDRMap[CGF.CurFn]) {
1479       UDRMap.erase(D);
1480     }
1481     FunctionUDRMap.erase(CGF.CurFn);
1482   }
1483 }
1484 
1485 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1486   if (!IdentTy) {
1487   }
1488   return llvm::PointerType::getUnqual(IdentTy);
1489 }
1490 
1491 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1492   if (!Kmpc_MicroTy) {
1493     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1494     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1495                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1496     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1497   }
1498   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1499 }
1500 
1501 llvm::Constant *
1502 CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1503   llvm::Constant *RTLFn = nullptr;
1504   switch (static_cast<OpenMPRTLFunction>(Function)) {
1505   case OMPRTL__kmpc_fork_call: {
1506     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1507     // microtask, ...);
1508     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1509                                 getKmpc_MicroPointerTy()};
1510     llvm::FunctionType *FnTy =
1511         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1512     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1513     break;
1514   }
1515   case OMPRTL__kmpc_global_thread_num: {
1516     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1517     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1518     llvm::FunctionType *FnTy =
1519         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1520     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1521     break;
1522   }
1523   case OMPRTL__kmpc_threadprivate_cached: {
1524     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1525     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1526     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1527                                 CGM.VoidPtrTy, CGM.SizeTy,
1528                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1529     llvm::FunctionType *FnTy =
1530         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1531     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1532     break;
1533   }
1534   case OMPRTL__kmpc_critical: {
1535     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1536     // kmp_critical_name *crit);
1537     llvm::Type *TypeParams[] = {
1538         getIdentTyPointerTy(), CGM.Int32Ty,
1539         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1540     llvm::FunctionType *FnTy =
1541         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1542     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1543     break;
1544   }
1545   case OMPRTL__kmpc_critical_with_hint: {
1546     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1547     // kmp_critical_name *crit, uintptr_t hint);
1548     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1549                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
1550                                 CGM.IntPtrTy};
1551     llvm::FunctionType *FnTy =
1552         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1553     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1554     break;
1555   }
1556   case OMPRTL__kmpc_threadprivate_register: {
1557     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1558     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1559     // typedef void *(*kmpc_ctor)(void *);
1560     auto KmpcCtorTy =
1561         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1562                                 /*isVarArg*/ false)->getPointerTo();
1563     // typedef void *(*kmpc_cctor)(void *, void *);
1564     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1565     auto KmpcCopyCtorTy =
1566         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1567                                 /*isVarArg*/ false)->getPointerTo();
1568     // typedef void (*kmpc_dtor)(void *);
1569     auto KmpcDtorTy =
1570         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1571             ->getPointerTo();
1572     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1573                               KmpcCopyCtorTy, KmpcDtorTy};
1574     auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1575                                         /*isVarArg*/ false);
1576     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1577     break;
1578   }
1579   case OMPRTL__kmpc_end_critical: {
1580     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1581     // kmp_critical_name *crit);
1582     llvm::Type *TypeParams[] = {
1583         getIdentTyPointerTy(), CGM.Int32Ty,
1584         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1585     llvm::FunctionType *FnTy =
1586         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1587     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1588     break;
1589   }
1590   case OMPRTL__kmpc_cancel_barrier: {
1591     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1592     // global_tid);
1593     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1594     llvm::FunctionType *FnTy =
1595         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1596     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1597     break;
1598   }
1599   case OMPRTL__kmpc_barrier: {
1600     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1601     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1602     llvm::FunctionType *FnTy =
1603         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1604     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1605     break;
1606   }
1607   case OMPRTL__kmpc_for_static_fini: {
1608     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1609     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1610     llvm::FunctionType *FnTy =
1611         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1612     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1613     break;
1614   }
1615   case OMPRTL__kmpc_push_num_threads: {
1616     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1617     // kmp_int32 num_threads)
1618     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1619                                 CGM.Int32Ty};
1620     llvm::FunctionType *FnTy =
1621         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1622     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1623     break;
1624   }
1625   case OMPRTL__kmpc_serialized_parallel: {
1626     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1627     // global_tid);
1628     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1629     llvm::FunctionType *FnTy =
1630         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1631     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1632     break;
1633   }
1634   case OMPRTL__kmpc_end_serialized_parallel: {
1635     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1636     // global_tid);
1637     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1638     llvm::FunctionType *FnTy =
1639         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1640     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1641     break;
1642   }
1643   case OMPRTL__kmpc_flush: {
1644     // Build void __kmpc_flush(ident_t *loc);
1645     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1646     llvm::FunctionType *FnTy =
1647         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1648     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1649     break;
1650   }
1651   case OMPRTL__kmpc_master: {
1652     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1653     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1654     llvm::FunctionType *FnTy =
1655         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1656     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1657     break;
1658   }
1659   case OMPRTL__kmpc_end_master: {
1660     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1661     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1662     llvm::FunctionType *FnTy =
1663         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1664     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1665     break;
1666   }
1667   case OMPRTL__kmpc_omp_taskyield: {
1668     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1669     // int end_part);
1670     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1671     llvm::FunctionType *FnTy =
1672         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1673     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1674     break;
1675   }
1676   case OMPRTL__kmpc_single: {
1677     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1678     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1679     llvm::FunctionType *FnTy =
1680         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1681     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1682     break;
1683   }
1684   case OMPRTL__kmpc_end_single: {
1685     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1686     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1687     llvm::FunctionType *FnTy =
1688         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1689     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1690     break;
1691   }
1692   case OMPRTL__kmpc_omp_task_alloc: {
1693     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1694     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1695     // kmp_routine_entry_t *task_entry);
1696     assert(KmpRoutineEntryPtrTy != nullptr &&
1697            "Type kmp_routine_entry_t must be created.");
1698     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1699                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
1700     // Return void * and then cast to particular kmp_task_t type.
1701     llvm::FunctionType *FnTy =
1702         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1703     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
1704     break;
1705   }
1706   case OMPRTL__kmpc_omp_task: {
1707     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1708     // *new_task);
1709     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1710                                 CGM.VoidPtrTy};
1711     llvm::FunctionType *FnTy =
1712         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1713     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
1714     break;
1715   }
1716   case OMPRTL__kmpc_copyprivate: {
1717     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
1718     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
1719     // kmp_int32 didit);
1720     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1721     auto *CpyFnTy =
1722         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
1723     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
1724                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
1725                                 CGM.Int32Ty};
1726     llvm::FunctionType *FnTy =
1727         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1728     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
1729     break;
1730   }
1731   case OMPRTL__kmpc_reduce: {
1732     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
1733     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
1734     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
1735     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1736     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1737                                                /*isVarArg=*/false);
1738     llvm::Type *TypeParams[] = {
1739         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1740         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1741         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1742     llvm::FunctionType *FnTy =
1743         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1744     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
1745     break;
1746   }
1747   case OMPRTL__kmpc_reduce_nowait: {
1748     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
1749     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
1750     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
1751     // *lck);
1752     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1753     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1754                                                /*isVarArg=*/false);
1755     llvm::Type *TypeParams[] = {
1756         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1757         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1758         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1759     llvm::FunctionType *FnTy =
1760         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1761     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
1762     break;
1763   }
1764   case OMPRTL__kmpc_end_reduce: {
1765     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
1766     // kmp_critical_name *lck);
1767     llvm::Type *TypeParams[] = {
1768         getIdentTyPointerTy(), CGM.Int32Ty,
1769         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1770     llvm::FunctionType *FnTy =
1771         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1772     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
1773     break;
1774   }
1775   case OMPRTL__kmpc_end_reduce_nowait: {
1776     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
1777     // kmp_critical_name *lck);
1778     llvm::Type *TypeParams[] = {
1779         getIdentTyPointerTy(), CGM.Int32Ty,
1780         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1781     llvm::FunctionType *FnTy =
1782         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1783     RTLFn =
1784         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
1785     break;
1786   }
1787   case OMPRTL__kmpc_omp_task_begin_if0: {
1788     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1789     // *new_task);
1790     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1791                                 CGM.VoidPtrTy};
1792     llvm::FunctionType *FnTy =
1793         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1794     RTLFn =
1795         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
1796     break;
1797   }
1798   case OMPRTL__kmpc_omp_task_complete_if0: {
1799     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1800     // *new_task);
1801     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1802                                 CGM.VoidPtrTy};
1803     llvm::FunctionType *FnTy =
1804         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1805     RTLFn = CGM.CreateRuntimeFunction(FnTy,
1806                                       /*Name=*/"__kmpc_omp_task_complete_if0");
1807     break;
1808   }
1809   case OMPRTL__kmpc_ordered: {
1810     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
1811     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1812     llvm::FunctionType *FnTy =
1813         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1814     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
1815     break;
1816   }
1817   case OMPRTL__kmpc_end_ordered: {
1818     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
1819     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1820     llvm::FunctionType *FnTy =
1821         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1822     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
1823     break;
1824   }
1825   case OMPRTL__kmpc_omp_taskwait: {
1826     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
1827     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1828     llvm::FunctionType *FnTy =
1829         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1830     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
1831     break;
1832   }
1833   case OMPRTL__kmpc_taskgroup: {
1834     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
1835     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1836     llvm::FunctionType *FnTy =
1837         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1838     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
1839     break;
1840   }
1841   case OMPRTL__kmpc_end_taskgroup: {
1842     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
1843     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1844     llvm::FunctionType *FnTy =
1845         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1846     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
1847     break;
1848   }
1849   case OMPRTL__kmpc_push_proc_bind: {
1850     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
1851     // int proc_bind)
1852     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1853     llvm::FunctionType *FnTy =
1854         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1855     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
1856     break;
1857   }
1858   case OMPRTL__kmpc_omp_task_with_deps: {
1859     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
1860     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
1861     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
1862     llvm::Type *TypeParams[] = {
1863         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
1864         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
1865     llvm::FunctionType *FnTy =
1866         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1867     RTLFn =
1868         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
1869     break;
1870   }
1871   case OMPRTL__kmpc_omp_wait_deps: {
1872     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
1873     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
1874     // kmp_depend_info_t *noalias_dep_list);
1875     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1876                                 CGM.Int32Ty,           CGM.VoidPtrTy,
1877                                 CGM.Int32Ty,           CGM.VoidPtrTy};
1878     llvm::FunctionType *FnTy =
1879         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1880     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
1881     break;
1882   }
1883   case OMPRTL__kmpc_cancellationpoint: {
1884     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
1885     // global_tid, kmp_int32 cncl_kind)
1886     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1887     llvm::FunctionType *FnTy =
1888         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1889     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
1890     break;
1891   }
1892   case OMPRTL__kmpc_cancel: {
1893     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
1894     // kmp_int32 cncl_kind)
1895     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1896     llvm::FunctionType *FnTy =
1897         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1898     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
1899     break;
1900   }
1901   case OMPRTL__kmpc_push_num_teams: {
1902     // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
1903     // kmp_int32 num_teams, kmp_int32 num_threads)
1904     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1905         CGM.Int32Ty};
1906     llvm::FunctionType *FnTy =
1907         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1908     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
1909     break;
1910   }
1911   case OMPRTL__kmpc_fork_teams: {
1912     // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
1913     // microtask, ...);
1914     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1915                                 getKmpc_MicroPointerTy()};
1916     llvm::FunctionType *FnTy =
1917         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1918     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
1919     break;
1920   }
1921   case OMPRTL__kmpc_taskloop: {
1922     // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
1923     // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
1924     // sched, kmp_uint64 grainsize, void *task_dup);
1925     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
1926                                 CGM.IntTy,
1927                                 CGM.VoidPtrTy,
1928                                 CGM.IntTy,
1929                                 CGM.Int64Ty->getPointerTo(),
1930                                 CGM.Int64Ty->getPointerTo(),
1931                                 CGM.Int64Ty,
1932                                 CGM.IntTy,
1933                                 CGM.IntTy,
1934                                 CGM.Int64Ty,
1935                                 CGM.VoidPtrTy};
1936     llvm::FunctionType *FnTy =
1937         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1938     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
1939     break;
1940   }
1941   case OMPRTL__kmpc_doacross_init: {
1942     // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
1943     // num_dims, struct kmp_dim *dims);
1944     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
1945                                 CGM.Int32Ty,
1946                                 CGM.Int32Ty,
1947                                 CGM.VoidPtrTy};
1948     llvm::FunctionType *FnTy =
1949         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1950     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
1951     break;
1952   }
1953   case OMPRTL__kmpc_doacross_fini: {
1954     // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
1955     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1956     llvm::FunctionType *FnTy =
1957         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1958     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
1959     break;
1960   }
1961   case OMPRTL__kmpc_doacross_post: {
1962     // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
1963     // *vec);
1964     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1965                                 CGM.Int64Ty->getPointerTo()};
1966     llvm::FunctionType *FnTy =
1967         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1968     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
1969     break;
1970   }
1971   case OMPRTL__kmpc_doacross_wait: {
1972     // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
1973     // *vec);
1974     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1975                                 CGM.Int64Ty->getPointerTo()};
1976     llvm::FunctionType *FnTy =
1977         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1978     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
1979     break;
1980   }
1981   case OMPRTL__kmpc_task_reduction_init: {
1982     // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
1983     // *data);
1984     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
1985     llvm::FunctionType *FnTy =
1986         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1987     RTLFn =
1988         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
1989     break;
1990   }
1991   case OMPRTL__kmpc_task_reduction_get_th_data: {
1992     // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
1993     // *d);
1994     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
1995     llvm::FunctionType *FnTy =
1996         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1997     RTLFn = CGM.CreateRuntimeFunction(
1998         FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
1999     break;
2000   }
2001   case OMPRTL__tgt_target: {
2002     // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
2003     // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
2004     // *arg_types);
2005     llvm::Type *TypeParams[] = {CGM.Int32Ty,
2006                                 CGM.VoidPtrTy,
2007                                 CGM.Int32Ty,
2008                                 CGM.VoidPtrPtrTy,
2009                                 CGM.VoidPtrPtrTy,
2010                                 CGM.SizeTy->getPointerTo(),
2011                                 CGM.Int32Ty->getPointerTo()};
2012     llvm::FunctionType *FnTy =
2013         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2014     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2015     break;
2016   }
2017   case OMPRTL__tgt_target_teams: {
2018     // Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
2019     // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
2020     // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
2021     llvm::Type *TypeParams[] = {CGM.Int32Ty,
2022                                 CGM.VoidPtrTy,
2023                                 CGM.Int32Ty,
2024                                 CGM.VoidPtrPtrTy,
2025                                 CGM.VoidPtrPtrTy,
2026                                 CGM.SizeTy->getPointerTo(),
2027                                 CGM.Int32Ty->getPointerTo(),
2028                                 CGM.Int32Ty,
2029                                 CGM.Int32Ty};
2030     llvm::FunctionType *FnTy =
2031         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2032     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2033     break;
2034   }
2035   case OMPRTL__tgt_register_lib: {
2036     // Build void __tgt_register_lib(__tgt_bin_desc *desc);
2037     QualType ParamTy =
2038         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2039     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2040     llvm::FunctionType *FnTy =
2041         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2042     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
2043     break;
2044   }
2045   case OMPRTL__tgt_unregister_lib: {
2046     // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
2047     QualType ParamTy =
2048         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2049     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2050     llvm::FunctionType *FnTy =
2051         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2052     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
2053     break;
2054   }
2055   case OMPRTL__tgt_target_data_begin: {
2056     // Build void __tgt_target_data_begin(int32_t device_id, int32_t arg_num,
2057     // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
2058     llvm::Type *TypeParams[] = {CGM.Int32Ty,
2059                                 CGM.Int32Ty,
2060                                 CGM.VoidPtrPtrTy,
2061                                 CGM.VoidPtrPtrTy,
2062                                 CGM.SizeTy->getPointerTo(),
2063                                 CGM.Int32Ty->getPointerTo()};
2064     llvm::FunctionType *FnTy =
2065         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2066     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2067     break;
2068   }
2069   case OMPRTL__tgt_target_data_end: {
2070     // Build void __tgt_target_data_end(int32_t device_id, int32_t arg_num,
2071     // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
2072     llvm::Type *TypeParams[] = {CGM.Int32Ty,
2073                                 CGM.Int32Ty,
2074                                 CGM.VoidPtrPtrTy,
2075                                 CGM.VoidPtrPtrTy,
2076                                 CGM.SizeTy->getPointerTo(),
2077                                 CGM.Int32Ty->getPointerTo()};
2078     llvm::FunctionType *FnTy =
2079         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2080     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2081     break;
2082   }
2083   case OMPRTL__tgt_target_data_update: {
2084     // Build void __tgt_target_data_update(int32_t device_id, int32_t arg_num,
2085     // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
2086     llvm::Type *TypeParams[] = {CGM.Int32Ty,
2087                                 CGM.Int32Ty,
2088                                 CGM.VoidPtrPtrTy,
2089                                 CGM.VoidPtrPtrTy,
2090                                 CGM.SizeTy->getPointerTo(),
2091                                 CGM.Int32Ty->getPointerTo()};
2092     llvm::FunctionType *FnTy =
2093         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2094     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2095     break;
2096   }
2097   }
2098   assert(RTLFn && "Unable to find OpenMP runtime function");
2099   return RTLFn;
2100 }
2101 
2102 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
2103                                                              bool IVSigned) {
2104   assert((IVSize == 32 || IVSize == 64) &&
2105          "IV size is not compatible with the omp runtime");
2106   auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2107                                        : "__kmpc_for_static_init_4u")
2108                            : (IVSigned ? "__kmpc_for_static_init_8"
2109                                        : "__kmpc_for_static_init_8u");
2110   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2111   auto PtrTy = llvm::PointerType::getUnqual(ITy);
2112   llvm::Type *TypeParams[] = {
2113     getIdentTyPointerTy(),                     // loc
2114     CGM.Int32Ty,                               // tid
2115     CGM.Int32Ty,                               // schedtype
2116     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2117     PtrTy,                                     // p_lower
2118     PtrTy,                                     // p_upper
2119     PtrTy,                                     // p_stride
2120     ITy,                                       // incr
2121     ITy                                        // chunk
2122   };
2123   llvm::FunctionType *FnTy =
2124       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2125   return CGM.CreateRuntimeFunction(FnTy, Name);
2126 }
2127 
2128 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
2129                                                             bool IVSigned) {
2130   assert((IVSize == 32 || IVSize == 64) &&
2131          "IV size is not compatible with the omp runtime");
2132   auto Name =
2133       IVSize == 32
2134           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2135           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2136   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2137   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2138                                CGM.Int32Ty,           // tid
2139                                CGM.Int32Ty,           // schedtype
2140                                ITy,                   // lower
2141                                ITy,                   // upper
2142                                ITy,                   // stride
2143                                ITy                    // chunk
2144   };
2145   llvm::FunctionType *FnTy =
2146       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2147   return CGM.CreateRuntimeFunction(FnTy, Name);
2148 }
2149 
2150 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
2151                                                             bool IVSigned) {
2152   assert((IVSize == 32 || IVSize == 64) &&
2153          "IV size is not compatible with the omp runtime");
2154   auto Name =
2155       IVSize == 32
2156           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2157           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2158   llvm::Type *TypeParams[] = {
2159       getIdentTyPointerTy(), // loc
2160       CGM.Int32Ty,           // tid
2161   };
2162   llvm::FunctionType *FnTy =
2163       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2164   return CGM.CreateRuntimeFunction(FnTy, Name);
2165 }
2166 
2167 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
2168                                                             bool IVSigned) {
2169   assert((IVSize == 32 || IVSize == 64) &&
2170          "IV size is not compatible with the omp runtime");
2171   auto Name =
2172       IVSize == 32
2173           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2174           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2175   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2176   auto PtrTy = llvm::PointerType::getUnqual(ITy);
2177   llvm::Type *TypeParams[] = {
2178     getIdentTyPointerTy(),                     // loc
2179     CGM.Int32Ty,                               // tid
2180     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2181     PtrTy,                                     // p_lower
2182     PtrTy,                                     // p_upper
2183     PtrTy                                      // p_stride
2184   };
2185   llvm::FunctionType *FnTy =
2186       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2187   return CGM.CreateRuntimeFunction(FnTy, Name);
2188 }
2189 
2190 llvm::Constant *
2191 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
2192   assert(!CGM.getLangOpts().OpenMPUseTLS ||
2193          !CGM.getContext().getTargetInfo().isTLSSupported());
2194   // Lookup the entry, lazily creating it if necessary.
2195   return getOrCreateInternalVariable(CGM.Int8PtrPtrTy,
2196                                      Twine(CGM.getMangledName(VD)) + ".cache.");
2197 }
2198 
2199 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
2200                                                 const VarDecl *VD,
2201                                                 Address VDAddr,
2202                                                 SourceLocation Loc) {
2203   if (CGM.getLangOpts().OpenMPUseTLS &&
2204       CGM.getContext().getTargetInfo().isTLSSupported())
2205     return VDAddr;
2206 
2207   auto VarTy = VDAddr.getElementType();
2208   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2209                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2210                                                        CGM.Int8PtrTy),
2211                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
2212                          getOrCreateThreadPrivateCache(VD)};
2213   return Address(CGF.EmitRuntimeCall(
2214       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2215                  VDAddr.getAlignment());
2216 }
2217 
2218 void CGOpenMPRuntime::emitThreadPrivateVarInit(
2219     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2220     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2221   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2222   // library.
2223   auto OMPLoc = emitUpdateLocation(CGF, Loc);
2224   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
2225                       OMPLoc);
2226   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2227   // to register constructor/destructor for variable.
2228   llvm::Value *Args[] = {OMPLoc,
2229                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2230                                                        CGM.VoidPtrTy),
2231                          Ctor, CopyCtor, Dtor};
2232   CGF.EmitRuntimeCall(
2233       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
2234 }
2235 
2236 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
2237     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2238     bool PerformInit, CodeGenFunction *CGF) {
2239   if (CGM.getLangOpts().OpenMPUseTLS &&
2240       CGM.getContext().getTargetInfo().isTLSSupported())
2241     return nullptr;
2242 
2243   VD = VD->getDefinition(CGM.getContext());
2244   if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
2245     ThreadPrivateWithDefinition.insert(VD);
2246     QualType ASTTy = VD->getType();
2247 
2248     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2249     auto Init = VD->getAnyInitializer();
2250     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2251       // Generate function that re-emits the declaration's initializer into the
2252       // threadprivate copy of the variable VD
2253       CodeGenFunction CtorCGF(CGM);
2254       FunctionArgList Args;
2255       ImplicitParamDecl Dst(CGM.getContext(), CGM.getContext().VoidPtrTy,
2256                             ImplicitParamDecl::Other);
2257       Args.push_back(&Dst);
2258 
2259       auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2260           CGM.getContext().VoidPtrTy, Args);
2261       auto FTy = CGM.getTypes().GetFunctionType(FI);
2262       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
2263           FTy, ".__kmpc_global_ctor_.", FI, Loc);
2264       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2265                             Args, SourceLocation());
2266       auto ArgVal = CtorCGF.EmitLoadOfScalar(
2267           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2268           CGM.getContext().VoidPtrTy, Dst.getLocation());
2269       Address Arg = Address(ArgVal, VDAddr.getAlignment());
2270       Arg = CtorCGF.Builder.CreateElementBitCast(Arg,
2271                                              CtorCGF.ConvertTypeForMem(ASTTy));
2272       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2273                                /*IsInitializer=*/true);
2274       ArgVal = CtorCGF.EmitLoadOfScalar(
2275           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2276           CGM.getContext().VoidPtrTy, Dst.getLocation());
2277       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2278       CtorCGF.FinishFunction();
2279       Ctor = Fn;
2280     }
2281     if (VD->getType().isDestructedType() != QualType::DK_none) {
2282       // Generate function that emits destructor call for the threadprivate copy
2283       // of the variable VD
2284       CodeGenFunction DtorCGF(CGM);
2285       FunctionArgList Args;
2286       ImplicitParamDecl Dst(CGM.getContext(), CGM.getContext().VoidPtrTy,
2287                             ImplicitParamDecl::Other);
2288       Args.push_back(&Dst);
2289 
2290       auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2291           CGM.getContext().VoidTy, Args);
2292       auto FTy = CGM.getTypes().GetFunctionType(FI);
2293       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
2294           FTy, ".__kmpc_global_dtor_.", FI, Loc);
2295       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2296       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2297                             SourceLocation());
2298       // Create a scope with an artificial location for the body of this function.
2299       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2300       auto ArgVal = DtorCGF.EmitLoadOfScalar(
2301           DtorCGF.GetAddrOfLocalVar(&Dst),
2302           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2303       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2304                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2305                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2306       DtorCGF.FinishFunction();
2307       Dtor = Fn;
2308     }
2309     // Do not emit init function if it is not required.
2310     if (!Ctor && !Dtor)
2311       return nullptr;
2312 
2313     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2314     auto CopyCtorTy =
2315         llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2316                                 /*isVarArg=*/false)->getPointerTo();
2317     // Copying constructor for the threadprivate variable.
2318     // Must be NULL - reserved by runtime, but currently it requires that this
2319     // parameter is always NULL. Otherwise it fires assertion.
2320     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2321     if (Ctor == nullptr) {
2322       auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2323                                             /*isVarArg=*/false)->getPointerTo();
2324       Ctor = llvm::Constant::getNullValue(CtorTy);
2325     }
2326     if (Dtor == nullptr) {
2327       auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2328                                             /*isVarArg=*/false)->getPointerTo();
2329       Dtor = llvm::Constant::getNullValue(DtorTy);
2330     }
2331     if (!CGF) {
2332       auto InitFunctionTy =
2333           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2334       auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2335           InitFunctionTy, ".__omp_threadprivate_init_.",
2336           CGM.getTypes().arrangeNullaryFunction());
2337       CodeGenFunction InitCGF(CGM);
2338       FunctionArgList ArgList;
2339       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2340                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
2341                             Loc);
2342       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2343       InitCGF.FinishFunction();
2344       return InitFunction;
2345     }
2346     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2347   }
2348   return nullptr;
2349 }
2350 
2351 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2352                                                           QualType VarType,
2353                                                           StringRef Name) {
2354   llvm::Twine VarName(Name, ".artificial.");
2355   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2356   llvm::Value *GAddr = getOrCreateInternalVariable(VarLVType, VarName);
2357   llvm::Value *Args[] = {
2358       emitUpdateLocation(CGF, SourceLocation()),
2359       getThreadID(CGF, SourceLocation()),
2360       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2361       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2362                                 /*IsSigned=*/false),
2363       getOrCreateInternalVariable(CGM.VoidPtrPtrTy, VarName + ".cache.")};
2364   return Address(
2365       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2366           CGF.EmitRuntimeCall(
2367               createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2368           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2369       CGM.getPointerAlign());
2370 }
2371 
2372 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
2373 /// function. Here is the logic:
2374 /// if (Cond) {
2375 ///   ThenGen();
2376 /// } else {
2377 ///   ElseGen();
2378 /// }
2379 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
2380                                       const RegionCodeGenTy &ThenGen,
2381                                       const RegionCodeGenTy &ElseGen) {
2382   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2383 
2384   // If the condition constant folds and can be elided, try to avoid emitting
2385   // the condition and the dead arm of the if/else.
2386   bool CondConstant;
2387   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2388     if (CondConstant)
2389       ThenGen(CGF);
2390     else
2391       ElseGen(CGF);
2392     return;
2393   }
2394 
2395   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2396   // emit the conditional branch.
2397   auto ThenBlock = CGF.createBasicBlock("omp_if.then");
2398   auto ElseBlock = CGF.createBasicBlock("omp_if.else");
2399   auto ContBlock = CGF.createBasicBlock("omp_if.end");
2400   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2401 
2402   // Emit the 'then' code.
2403   CGF.EmitBlock(ThenBlock);
2404   ThenGen(CGF);
2405   CGF.EmitBranch(ContBlock);
2406   // Emit the 'else' code if present.
2407   // There is no need to emit line number for unconditional branch.
2408   (void)ApplyDebugLocation::CreateEmpty(CGF);
2409   CGF.EmitBlock(ElseBlock);
2410   ElseGen(CGF);
2411   // There is no need to emit line number for unconditional branch.
2412   (void)ApplyDebugLocation::CreateEmpty(CGF);
2413   CGF.EmitBranch(ContBlock);
2414   // Emit the continuation block for code after the if.
2415   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2416 }
2417 
2418 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2419                                        llvm::Value *OutlinedFn,
2420                                        ArrayRef<llvm::Value *> CapturedVars,
2421                                        const Expr *IfCond) {
2422   if (!CGF.HaveInsertPoint())
2423     return;
2424   auto *RTLoc = emitUpdateLocation(CGF, Loc);
2425   auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
2426                                                      PrePostActionTy &) {
2427     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2428     auto &RT = CGF.CGM.getOpenMPRuntime();
2429     llvm::Value *Args[] = {
2430         RTLoc,
2431         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2432         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2433     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2434     RealArgs.append(std::begin(Args), std::end(Args));
2435     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2436 
2437     auto RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
2438     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2439   };
2440   auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
2441                                                           PrePostActionTy &) {
2442     auto &RT = CGF.CGM.getOpenMPRuntime();
2443     auto ThreadID = RT.getThreadID(CGF, Loc);
2444     // Build calls:
2445     // __kmpc_serialized_parallel(&Loc, GTid);
2446     llvm::Value *Args[] = {RTLoc, ThreadID};
2447     CGF.EmitRuntimeCall(
2448         RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
2449 
2450     // OutlinedFn(&GTid, &zero, CapturedStruct);
2451     auto ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2452     Address ZeroAddr =
2453         CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
2454                              /*Name*/ ".zero.addr");
2455     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
2456     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2457     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2458     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2459     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2460     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2461 
2462     // __kmpc_end_serialized_parallel(&Loc, GTid);
2463     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2464     CGF.EmitRuntimeCall(
2465         RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
2466         EndArgs);
2467   };
2468   if (IfCond)
2469     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
2470   else {
2471     RegionCodeGenTy ThenRCG(ThenGen);
2472     ThenRCG(CGF);
2473   }
2474 }
2475 
2476 // If we're inside an (outlined) parallel region, use the region info's
2477 // thread-ID variable (it is passed in a first argument of the outlined function
2478 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2479 // regular serial code region, get thread ID by calling kmp_int32
2480 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2481 // return the address of that temp.
2482 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2483                                              SourceLocation Loc) {
2484   if (auto *OMPRegionInfo =
2485           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2486     if (OMPRegionInfo->getThreadIDVariable())
2487       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
2488 
2489   auto ThreadID = getThreadID(CGF, Loc);
2490   auto Int32Ty =
2491       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2492   auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2493   CGF.EmitStoreOfScalar(ThreadID,
2494                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2495 
2496   return ThreadIDTemp;
2497 }
2498 
2499 llvm::Constant *
2500 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
2501                                              const llvm::Twine &Name) {
2502   SmallString<256> Buffer;
2503   llvm::raw_svector_ostream Out(Buffer);
2504   Out << Name;
2505   auto RuntimeName = Out.str();
2506   auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
2507   if (Elem.second) {
2508     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2509            "OMP internal variable has different type than requested");
2510     return &*Elem.second;
2511   }
2512 
2513   return Elem.second = new llvm::GlobalVariable(
2514              CGM.getModule(), Ty, /*IsConstant*/ false,
2515              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2516              Elem.first());
2517 }
2518 
2519 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2520   llvm::Twine Name(".gomp_critical_user_", CriticalName);
2521   return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
2522 }
2523 
2524 namespace {
2525 /// Common pre(post)-action for different OpenMP constructs.
2526 class CommonActionTy final : public PrePostActionTy {
2527   llvm::Value *EnterCallee;
2528   ArrayRef<llvm::Value *> EnterArgs;
2529   llvm::Value *ExitCallee;
2530   ArrayRef<llvm::Value *> ExitArgs;
2531   bool Conditional;
2532   llvm::BasicBlock *ContBlock = nullptr;
2533 
2534 public:
2535   CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs,
2536                  llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs,
2537                  bool Conditional = false)
2538       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2539         ExitArgs(ExitArgs), Conditional(Conditional) {}
2540   void Enter(CodeGenFunction &CGF) override {
2541     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2542     if (Conditional) {
2543       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2544       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2545       ContBlock = CGF.createBasicBlock("omp_if.end");
2546       // Generate the branch (If-stmt)
2547       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2548       CGF.EmitBlock(ThenBlock);
2549     }
2550   }
2551   void Done(CodeGenFunction &CGF) {
2552     // Emit the rest of blocks/branches
2553     CGF.EmitBranch(ContBlock);
2554     CGF.EmitBlock(ContBlock, true);
2555   }
2556   void Exit(CodeGenFunction &CGF) override {
2557     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2558   }
2559 };
2560 } // anonymous namespace
2561 
2562 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2563                                          StringRef CriticalName,
2564                                          const RegionCodeGenTy &CriticalOpGen,
2565                                          SourceLocation Loc, const Expr *Hint) {
2566   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2567   // CriticalOpGen();
2568   // __kmpc_end_critical(ident_t *, gtid, Lock);
2569   // Prepare arguments and build a call to __kmpc_critical
2570   if (!CGF.HaveInsertPoint())
2571     return;
2572   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2573                          getCriticalRegionLock(CriticalName)};
2574   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2575                                                 std::end(Args));
2576   if (Hint) {
2577     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2578         CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
2579   }
2580   CommonActionTy Action(
2581       createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
2582                                  : OMPRTL__kmpc_critical),
2583       EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
2584   CriticalOpGen.setAction(Action);
2585   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2586 }
2587 
2588 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2589                                        const RegionCodeGenTy &MasterOpGen,
2590                                        SourceLocation Loc) {
2591   if (!CGF.HaveInsertPoint())
2592     return;
2593   // if(__kmpc_master(ident_t *, gtid)) {
2594   //   MasterOpGen();
2595   //   __kmpc_end_master(ident_t *, gtid);
2596   // }
2597   // Prepare arguments and build a call to __kmpc_master
2598   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2599   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
2600                         createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
2601                         /*Conditional=*/true);
2602   MasterOpGen.setAction(Action);
2603   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2604   Action.Done(CGF);
2605 }
2606 
2607 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2608                                         SourceLocation Loc) {
2609   if (!CGF.HaveInsertPoint())
2610     return;
2611   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2612   llvm::Value *Args[] = {
2613       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2614       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2615   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
2616   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2617     Region->emitUntiedSwitch(CGF);
2618 }
2619 
2620 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2621                                           const RegionCodeGenTy &TaskgroupOpGen,
2622                                           SourceLocation Loc) {
2623   if (!CGF.HaveInsertPoint())
2624     return;
2625   // __kmpc_taskgroup(ident_t *, gtid);
2626   // TaskgroupOpGen();
2627   // __kmpc_end_taskgroup(ident_t *, gtid);
2628   // Prepare arguments and build a call to __kmpc_taskgroup
2629   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2630   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
2631                         createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
2632                         Args);
2633   TaskgroupOpGen.setAction(Action);
2634   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2635 }
2636 
2637 /// Given an array of pointers to variables, project the address of a
2638 /// given variable.
2639 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2640                                       unsigned Index, const VarDecl *Var) {
2641   // Pull out the pointer to the variable.
2642   Address PtrAddr =
2643       CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize());
2644   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2645 
2646   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2647   Addr = CGF.Builder.CreateElementBitCast(
2648       Addr, CGF.ConvertTypeForMem(Var->getType()));
2649   return Addr;
2650 }
2651 
2652 static llvm::Value *emitCopyprivateCopyFunction(
2653     CodeGenModule &CGM, llvm::Type *ArgsType,
2654     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2655     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) {
2656   auto &C = CGM.getContext();
2657   // void copy_func(void *LHSArg, void *RHSArg);
2658   FunctionArgList Args;
2659   ImplicitParamDecl LHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
2660   ImplicitParamDecl RHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
2661   Args.push_back(&LHSArg);
2662   Args.push_back(&RHSArg);
2663   auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2664   auto *Fn = llvm::Function::Create(
2665       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
2666       ".omp.copyprivate.copy_func", &CGM.getModule());
2667   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
2668   CodeGenFunction CGF(CGM);
2669   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
2670   // Dest = (void*[n])(LHSArg);
2671   // Src = (void*[n])(RHSArg);
2672   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2673       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2674       ArgsType), CGF.getPointerAlign());
2675   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2676       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2677       ArgsType), CGF.getPointerAlign());
2678   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2679   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2680   // ...
2681   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2682   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2683     auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2684     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2685 
2686     auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2687     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2688 
2689     auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2690     QualType Type = VD->getType();
2691     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2692   }
2693   CGF.FinishFunction();
2694   return Fn;
2695 }
2696 
2697 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2698                                        const RegionCodeGenTy &SingleOpGen,
2699                                        SourceLocation Loc,
2700                                        ArrayRef<const Expr *> CopyprivateVars,
2701                                        ArrayRef<const Expr *> SrcExprs,
2702                                        ArrayRef<const Expr *> DstExprs,
2703                                        ArrayRef<const Expr *> AssignmentOps) {
2704   if (!CGF.HaveInsertPoint())
2705     return;
2706   assert(CopyprivateVars.size() == SrcExprs.size() &&
2707          CopyprivateVars.size() == DstExprs.size() &&
2708          CopyprivateVars.size() == AssignmentOps.size());
2709   auto &C = CGM.getContext();
2710   // int32 did_it = 0;
2711   // if(__kmpc_single(ident_t *, gtid)) {
2712   //   SingleOpGen();
2713   //   __kmpc_end_single(ident_t *, gtid);
2714   //   did_it = 1;
2715   // }
2716   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2717   // <copy_func>, did_it);
2718 
2719   Address DidIt = Address::invalid();
2720   if (!CopyprivateVars.empty()) {
2721     // int32 did_it = 0;
2722     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2723     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2724     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2725   }
2726   // Prepare arguments and build a call to __kmpc_single
2727   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2728   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
2729                         createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
2730                         /*Conditional=*/true);
2731   SingleOpGen.setAction(Action);
2732   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2733   if (DidIt.isValid()) {
2734     // did_it = 1;
2735     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2736   }
2737   Action.Done(CGF);
2738   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2739   // <copy_func>, did_it);
2740   if (DidIt.isValid()) {
2741     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2742     auto CopyprivateArrayTy =
2743         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
2744                                /*IndexTypeQuals=*/0);
2745     // Create a list of all private variables for copyprivate.
2746     Address CopyprivateList =
2747         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2748     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2749       Address Elem = CGF.Builder.CreateConstArrayGEP(
2750           CopyprivateList, I, CGF.getPointerSize());
2751       CGF.Builder.CreateStore(
2752           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2753               CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
2754           Elem);
2755     }
2756     // Build function that copies private values from single region to all other
2757     // threads in the corresponding parallel region.
2758     auto *CpyFn = emitCopyprivateCopyFunction(
2759         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2760         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
2761     auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2762     Address CL =
2763       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2764                                                       CGF.VoidPtrTy);
2765     auto *DidItVal = CGF.Builder.CreateLoad(DidIt);
2766     llvm::Value *Args[] = {
2767         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2768         getThreadID(CGF, Loc),        // i32 <gtid>
2769         BufSize,                      // size_t <buf_size>
2770         CL.getPointer(),              // void *<copyprivate list>
2771         CpyFn,                        // void (*) (void *, void *) <copy_func>
2772         DidItVal                      // i32 did_it
2773     };
2774     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
2775   }
2776 }
2777 
2778 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2779                                         const RegionCodeGenTy &OrderedOpGen,
2780                                         SourceLocation Loc, bool IsThreads) {
2781   if (!CGF.HaveInsertPoint())
2782     return;
2783   // __kmpc_ordered(ident_t *, gtid);
2784   // OrderedOpGen();
2785   // __kmpc_end_ordered(ident_t *, gtid);
2786   // Prepare arguments and build a call to __kmpc_ordered
2787   if (IsThreads) {
2788     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2789     CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
2790                           createRuntimeFunction(OMPRTL__kmpc_end_ordered),
2791                           Args);
2792     OrderedOpGen.setAction(Action);
2793     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2794     return;
2795   }
2796   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2797 }
2798 
2799 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2800                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2801                                       bool ForceSimpleCall) {
2802   if (!CGF.HaveInsertPoint())
2803     return;
2804   // Build call __kmpc_cancel_barrier(loc, thread_id);
2805   // Build call __kmpc_barrier(loc, thread_id);
2806   unsigned Flags;
2807   if (Kind == OMPD_for)
2808     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2809   else if (Kind == OMPD_sections)
2810     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2811   else if (Kind == OMPD_single)
2812     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2813   else if (Kind == OMPD_barrier)
2814     Flags = OMP_IDENT_BARRIER_EXPL;
2815   else
2816     Flags = OMP_IDENT_BARRIER_IMPL;
2817   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2818   // thread_id);
2819   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2820                          getThreadID(CGF, Loc)};
2821   if (auto *OMPRegionInfo =
2822           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
2823     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2824       auto *Result = CGF.EmitRuntimeCall(
2825           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
2826       if (EmitChecks) {
2827         // if (__kmpc_cancel_barrier()) {
2828         //   exit from construct;
2829         // }
2830         auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
2831         auto *ContBB = CGF.createBasicBlock(".cancel.continue");
2832         auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
2833         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2834         CGF.EmitBlock(ExitBB);
2835         //   exit from construct;
2836         auto CancelDestination =
2837             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2838         CGF.EmitBranchThroughCleanup(CancelDestination);
2839         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2840       }
2841       return;
2842     }
2843   }
2844   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
2845 }
2846 
2847 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
2848 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2849                                           bool Chunked, bool Ordered) {
2850   switch (ScheduleKind) {
2851   case OMPC_SCHEDULE_static:
2852     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2853                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2854   case OMPC_SCHEDULE_dynamic:
2855     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2856   case OMPC_SCHEDULE_guided:
2857     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2858   case OMPC_SCHEDULE_runtime:
2859     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2860   case OMPC_SCHEDULE_auto:
2861     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2862   case OMPC_SCHEDULE_unknown:
2863     assert(!Chunked && "chunk was specified but schedule kind not known");
2864     return Ordered ? OMP_ord_static : OMP_sch_static;
2865   }
2866   llvm_unreachable("Unexpected runtime schedule");
2867 }
2868 
2869 /// \brief Map the OpenMP distribute schedule to the runtime enumeration.
2870 static OpenMPSchedType
2871 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2872   // only static is allowed for dist_schedule
2873   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2874 }
2875 
2876 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2877                                          bool Chunked) const {
2878   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2879   return Schedule == OMP_sch_static;
2880 }
2881 
2882 bool CGOpenMPRuntime::isStaticNonchunked(
2883     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2884   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2885   return Schedule == OMP_dist_sch_static;
2886 }
2887 
2888 
2889 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2890   auto Schedule =
2891       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2892   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2893   return Schedule != OMP_sch_static;
2894 }
2895 
2896 static int addMonoNonMonoModifier(OpenMPSchedType Schedule,
2897                                   OpenMPScheduleClauseModifier M1,
2898                                   OpenMPScheduleClauseModifier M2) {
2899   int Modifier = 0;
2900   switch (M1) {
2901   case OMPC_SCHEDULE_MODIFIER_monotonic:
2902     Modifier = OMP_sch_modifier_monotonic;
2903     break;
2904   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2905     Modifier = OMP_sch_modifier_nonmonotonic;
2906     break;
2907   case OMPC_SCHEDULE_MODIFIER_simd:
2908     if (Schedule == OMP_sch_static_chunked)
2909       Schedule = OMP_sch_static_balanced_chunked;
2910     break;
2911   case OMPC_SCHEDULE_MODIFIER_last:
2912   case OMPC_SCHEDULE_MODIFIER_unknown:
2913     break;
2914   }
2915   switch (M2) {
2916   case OMPC_SCHEDULE_MODIFIER_monotonic:
2917     Modifier = OMP_sch_modifier_monotonic;
2918     break;
2919   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2920     Modifier = OMP_sch_modifier_nonmonotonic;
2921     break;
2922   case OMPC_SCHEDULE_MODIFIER_simd:
2923     if (Schedule == OMP_sch_static_chunked)
2924       Schedule = OMP_sch_static_balanced_chunked;
2925     break;
2926   case OMPC_SCHEDULE_MODIFIER_last:
2927   case OMPC_SCHEDULE_MODIFIER_unknown:
2928     break;
2929   }
2930   return Schedule | Modifier;
2931 }
2932 
2933 void CGOpenMPRuntime::emitForDispatchInit(
2934     CodeGenFunction &CGF, SourceLocation Loc,
2935     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2936     bool Ordered, const DispatchRTInput &DispatchValues) {
2937   if (!CGF.HaveInsertPoint())
2938     return;
2939   OpenMPSchedType Schedule = getRuntimeSchedule(
2940       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2941   assert(Ordered ||
2942          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2943           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2944           Schedule != OMP_sch_static_balanced_chunked));
2945   // Call __kmpc_dispatch_init(
2946   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2947   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2948   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2949 
2950   // If the Chunk was not specified in the clause - use default value 1.
2951   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2952                                             : CGF.Builder.getIntN(IVSize, 1);
2953   llvm::Value *Args[] = {
2954       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2955       CGF.Builder.getInt32(addMonoNonMonoModifier(
2956           Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2957       DispatchValues.LB,                                // Lower
2958       DispatchValues.UB,                                // Upper
2959       CGF.Builder.getIntN(IVSize, 1),                   // Stride
2960       Chunk                                             // Chunk
2961   };
2962   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2963 }
2964 
2965 static void emitForStaticInitCall(
2966     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2967     llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule,
2968     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2969     const CGOpenMPRuntime::StaticRTInput &Values) {
2970   if (!CGF.HaveInsertPoint())
2971     return;
2972 
2973   assert(!Values.Ordered);
2974   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2975          Schedule == OMP_sch_static_balanced_chunked ||
2976          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2977          Schedule == OMP_dist_sch_static ||
2978          Schedule == OMP_dist_sch_static_chunked);
2979 
2980   // Call __kmpc_for_static_init(
2981   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2982   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2983   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2984   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2985   llvm::Value *Chunk = Values.Chunk;
2986   if (Chunk == nullptr) {
2987     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2988             Schedule == OMP_dist_sch_static) &&
2989            "expected static non-chunked schedule");
2990     // If the Chunk was not specified in the clause - use default value 1.
2991     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2992   } else {
2993     assert((Schedule == OMP_sch_static_chunked ||
2994             Schedule == OMP_sch_static_balanced_chunked ||
2995             Schedule == OMP_ord_static_chunked ||
2996             Schedule == OMP_dist_sch_static_chunked) &&
2997            "expected static chunked schedule");
2998   }
2999   llvm::Value *Args[] = {
3000       UpdateLocation,
3001       ThreadId,
3002       CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1,
3003                                                   M2)), // Schedule type
3004       Values.IL.getPointer(),                           // &isLastIter
3005       Values.LB.getPointer(),                           // &LB
3006       Values.UB.getPointer(),                           // &UB
3007       Values.ST.getPointer(),                           // &Stride
3008       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
3009       Chunk                                             // Chunk
3010   };
3011   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3012 }
3013 
3014 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
3015                                         SourceLocation Loc,
3016                                         OpenMPDirectiveKind DKind,
3017                                         const OpenMPScheduleTy &ScheduleKind,
3018                                         const StaticRTInput &Values) {
3019   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3020       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3021   assert(isOpenMPWorksharingDirective(DKind) &&
3022          "Expected loop-based or sections-based directive.");
3023   auto *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3024                                              isOpenMPLoopDirective(DKind)
3025                                                  ? OMP_IDENT_WORK_LOOP
3026                                                  : OMP_IDENT_WORK_SECTIONS);
3027   auto *ThreadId = getThreadID(CGF, Loc);
3028   auto *StaticInitFunction =
3029       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3030   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3031                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3032 }
3033 
3034 void CGOpenMPRuntime::emitDistributeStaticInit(
3035     CodeGenFunction &CGF, SourceLocation Loc,
3036     OpenMPDistScheduleClauseKind SchedKind,
3037     const CGOpenMPRuntime::StaticRTInput &Values) {
3038   OpenMPSchedType ScheduleNum =
3039       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3040   auto *UpdatedLocation =
3041       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3042   auto *ThreadId = getThreadID(CGF, Loc);
3043   auto *StaticInitFunction =
3044       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3045   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3046                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3047                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
3048 }
3049 
3050 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
3051                                           SourceLocation Loc,
3052                                           OpenMPDirectiveKind DKind) {
3053   if (!CGF.HaveInsertPoint())
3054     return;
3055   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3056   llvm::Value *Args[] = {
3057       emitUpdateLocation(CGF, Loc,
3058                          isOpenMPDistributeDirective(DKind)
3059                              ? OMP_IDENT_WORK_DISTRIBUTE
3060                              : isOpenMPLoopDirective(DKind)
3061                                    ? OMP_IDENT_WORK_LOOP
3062                                    : OMP_IDENT_WORK_SECTIONS),
3063       getThreadID(CGF, Loc)};
3064   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
3065                       Args);
3066 }
3067 
3068 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
3069                                                  SourceLocation Loc,
3070                                                  unsigned IVSize,
3071                                                  bool IVSigned) {
3072   if (!CGF.HaveInsertPoint())
3073     return;
3074   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3075   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3076   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3077 }
3078 
3079 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
3080                                           SourceLocation Loc, unsigned IVSize,
3081                                           bool IVSigned, Address IL,
3082                                           Address LB, Address UB,
3083                                           Address ST) {
3084   // Call __kmpc_dispatch_next(
3085   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3086   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3087   //          kmp_int[32|64] *p_stride);
3088   llvm::Value *Args[] = {
3089       emitUpdateLocation(CGF, Loc),
3090       getThreadID(CGF, Loc),
3091       IL.getPointer(), // &isLastIter
3092       LB.getPointer(), // &Lower
3093       UB.getPointer(), // &Upper
3094       ST.getPointer()  // &Stride
3095   };
3096   llvm::Value *Call =
3097       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3098   return CGF.EmitScalarConversion(
3099       Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true),
3100       CGF.getContext().BoolTy, Loc);
3101 }
3102 
3103 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
3104                                            llvm::Value *NumThreads,
3105                                            SourceLocation Loc) {
3106   if (!CGF.HaveInsertPoint())
3107     return;
3108   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3109   llvm::Value *Args[] = {
3110       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3111       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3112   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
3113                       Args);
3114 }
3115 
3116 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
3117                                          OpenMPProcBindClauseKind ProcBind,
3118                                          SourceLocation Loc) {
3119   if (!CGF.HaveInsertPoint())
3120     return;
3121   // Constants for proc bind value accepted by the runtime.
3122   enum ProcBindTy {
3123     ProcBindFalse = 0,
3124     ProcBindTrue,
3125     ProcBindMaster,
3126     ProcBindClose,
3127     ProcBindSpread,
3128     ProcBindIntel,
3129     ProcBindDefault
3130   } RuntimeProcBind;
3131   switch (ProcBind) {
3132   case OMPC_PROC_BIND_master:
3133     RuntimeProcBind = ProcBindMaster;
3134     break;
3135   case OMPC_PROC_BIND_close:
3136     RuntimeProcBind = ProcBindClose;
3137     break;
3138   case OMPC_PROC_BIND_spread:
3139     RuntimeProcBind = ProcBindSpread;
3140     break;
3141   case OMPC_PROC_BIND_unknown:
3142     llvm_unreachable("Unsupported proc_bind value.");
3143   }
3144   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3145   llvm::Value *Args[] = {
3146       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3147       llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
3148   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
3149 }
3150 
3151 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3152                                 SourceLocation Loc) {
3153   if (!CGF.HaveInsertPoint())
3154     return;
3155   // Build call void __kmpc_flush(ident_t *loc)
3156   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
3157                       emitUpdateLocation(CGF, Loc));
3158 }
3159 
3160 namespace {
3161 /// \brief Indexes of fields for type kmp_task_t.
3162 enum KmpTaskTFields {
3163   /// \brief List of shared variables.
3164   KmpTaskTShareds,
3165   /// \brief Task routine.
3166   KmpTaskTRoutine,
3167   /// \brief Partition id for the untied tasks.
3168   KmpTaskTPartId,
3169   /// Function with call of destructors for private variables.
3170   Data1,
3171   /// Task priority.
3172   Data2,
3173   /// (Taskloops only) Lower bound.
3174   KmpTaskTLowerBound,
3175   /// (Taskloops only) Upper bound.
3176   KmpTaskTUpperBound,
3177   /// (Taskloops only) Stride.
3178   KmpTaskTStride,
3179   /// (Taskloops only) Is last iteration flag.
3180   KmpTaskTLastIter,
3181   /// (Taskloops only) Reduction data.
3182   KmpTaskTReductions,
3183 };
3184 } // anonymous namespace
3185 
3186 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3187   // FIXME: Add other entries type when they become supported.
3188   return OffloadEntriesTargetRegion.empty();
3189 }
3190 
3191 /// \brief Initialize target region entry.
3192 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3193     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3194                                     StringRef ParentName, unsigned LineNum,
3195                                     unsigned Order) {
3196   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3197                                              "only required for the device "
3198                                              "code generation.");
3199   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3200       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3201                                    /*Flags=*/0);
3202   ++OffloadingEntriesNum;
3203 }
3204 
3205 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3206     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3207                                   StringRef ParentName, unsigned LineNum,
3208                                   llvm::Constant *Addr, llvm::Constant *ID,
3209                                   int32_t Flags) {
3210   // If we are emitting code for a target, the entry is already initialized,
3211   // only has to be registered.
3212   if (CGM.getLangOpts().OpenMPIsDevice) {
3213     assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
3214            "Entry must exist.");
3215     auto &Entry =
3216         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3217     assert(Entry.isValid() && "Entry not initialized!");
3218     Entry.setAddress(Addr);
3219     Entry.setID(ID);
3220     Entry.setFlags(Flags);
3221     return;
3222   } else {
3223     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID, Flags);
3224     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3225   }
3226 }
3227 
3228 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3229     unsigned DeviceID, unsigned FileID, StringRef ParentName,
3230     unsigned LineNum) const {
3231   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3232   if (PerDevice == OffloadEntriesTargetRegion.end())
3233     return false;
3234   auto PerFile = PerDevice->second.find(FileID);
3235   if (PerFile == PerDevice->second.end())
3236     return false;
3237   auto PerParentName = PerFile->second.find(ParentName);
3238   if (PerParentName == PerFile->second.end())
3239     return false;
3240   auto PerLine = PerParentName->second.find(LineNum);
3241   if (PerLine == PerParentName->second.end())
3242     return false;
3243   // Fail if this entry is already registered.
3244   if (PerLine->second.getAddress() || PerLine->second.getID())
3245     return false;
3246   return true;
3247 }
3248 
3249 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3250     const OffloadTargetRegionEntryInfoActTy &Action) {
3251   // Scan all target region entries and perform the provided action.
3252   for (auto &D : OffloadEntriesTargetRegion)
3253     for (auto &F : D.second)
3254       for (auto &P : F.second)
3255         for (auto &L : P.second)
3256           Action(D.first, F.first, P.first(), L.first, L.second);
3257 }
3258 
3259 /// \brief Create a Ctor/Dtor-like function whose body is emitted through
3260 /// \a Codegen. This is used to emit the two functions that register and
3261 /// unregister the descriptor of the current compilation unit.
3262 static llvm::Function *
3263 createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name,
3264                                          const RegionCodeGenTy &Codegen) {
3265   auto &C = CGM.getContext();
3266   FunctionArgList Args;
3267   ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other);
3268   Args.push_back(&DummyPtr);
3269 
3270   CodeGenFunction CGF(CGM);
3271   auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3272   auto FTy = CGM.getTypes().GetFunctionType(FI);
3273   auto *Fn =
3274       CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation());
3275   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args, SourceLocation());
3276   Codegen(CGF);
3277   CGF.FinishFunction();
3278   return Fn;
3279 }
3280 
3281 llvm::Function *
3282 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
3283 
3284   // If we don't have entries or if we are emitting code for the device, we
3285   // don't need to do anything.
3286   if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
3287     return nullptr;
3288 
3289   auto &M = CGM.getModule();
3290   auto &C = CGM.getContext();
3291 
3292   // Get list of devices we care about
3293   auto &Devices = CGM.getLangOpts().OMPTargetTriples;
3294 
3295   // We should be creating an offloading descriptor only if there are devices
3296   // specified.
3297   assert(!Devices.empty() && "No OpenMP offloading devices??");
3298 
3299   // Create the external variables that will point to the begin and end of the
3300   // host entries section. These will be defined by the linker.
3301   auto *OffloadEntryTy =
3302       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
3303   llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable(
3304       M, OffloadEntryTy, /*isConstant=*/true,
3305       llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
3306       ".omp_offloading.entries_begin");
3307   llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable(
3308       M, OffloadEntryTy, /*isConstant=*/true,
3309       llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
3310       ".omp_offloading.entries_end");
3311 
3312   // Create all device images
3313   auto *DeviceImageTy = cast<llvm::StructType>(
3314       CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy()));
3315   ConstantInitBuilder DeviceImagesBuilder(CGM);
3316   auto DeviceImagesEntries = DeviceImagesBuilder.beginArray(DeviceImageTy);
3317 
3318   for (unsigned i = 0; i < Devices.size(); ++i) {
3319     StringRef T = Devices[i].getTriple();
3320     auto *ImgBegin = new llvm::GlobalVariable(
3321         M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
3322         /*Initializer=*/nullptr,
3323         Twine(".omp_offloading.img_start.") + Twine(T));
3324     auto *ImgEnd = new llvm::GlobalVariable(
3325         M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
3326         /*Initializer=*/nullptr, Twine(".omp_offloading.img_end.") + Twine(T));
3327 
3328     auto Dev = DeviceImagesEntries.beginStruct(DeviceImageTy);
3329     Dev.add(ImgBegin);
3330     Dev.add(ImgEnd);
3331     Dev.add(HostEntriesBegin);
3332     Dev.add(HostEntriesEnd);
3333     Dev.finishAndAddTo(DeviceImagesEntries);
3334   }
3335 
3336   // Create device images global array.
3337   llvm::GlobalVariable *DeviceImages =
3338     DeviceImagesEntries.finishAndCreateGlobal(".omp_offloading.device_images",
3339                                               CGM.getPointerAlign(),
3340                                               /*isConstant=*/true);
3341   DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3342 
3343   // This is a Zero array to be used in the creation of the constant expressions
3344   llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
3345                              llvm::Constant::getNullValue(CGM.Int32Ty)};
3346 
3347   // Create the target region descriptor.
3348   auto *BinaryDescriptorTy = cast<llvm::StructType>(
3349       CGM.getTypes().ConvertTypeForMem(getTgtBinaryDescriptorQTy()));
3350   ConstantInitBuilder DescBuilder(CGM);
3351   auto DescInit = DescBuilder.beginStruct(BinaryDescriptorTy);
3352   DescInit.addInt(CGM.Int32Ty, Devices.size());
3353   DescInit.add(llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(),
3354                                                     DeviceImages,
3355                                                     Index));
3356   DescInit.add(HostEntriesBegin);
3357   DescInit.add(HostEntriesEnd);
3358 
3359   auto *Desc = DescInit.finishAndCreateGlobal(".omp_offloading.descriptor",
3360                                               CGM.getPointerAlign(),
3361                                               /*isConstant=*/true);
3362 
3363   // Emit code to register or unregister the descriptor at execution
3364   // startup or closing, respectively.
3365 
3366   // Create a variable to drive the registration and unregistration of the
3367   // descriptor, so we can reuse the logic that emits Ctors and Dtors.
3368   auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var");
3369   ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(),
3370                                 IdentInfo, C.CharTy, ImplicitParamDecl::Other);
3371 
3372   auto *UnRegFn = createOffloadingBinaryDescriptorFunction(
3373       CGM, ".omp_offloading.descriptor_unreg",
3374       [&](CodeGenFunction &CGF, PrePostActionTy &) {
3375         CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
3376                             Desc);
3377       });
3378   auto *RegFn = createOffloadingBinaryDescriptorFunction(
3379       CGM, ".omp_offloading.descriptor_reg",
3380       [&](CodeGenFunction &CGF, PrePostActionTy &) {
3381         CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib),
3382                             Desc);
3383         CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
3384       });
3385   if (CGM.supportsCOMDAT()) {
3386     // It is sufficient to call registration function only once, so create a
3387     // COMDAT group for registration/unregistration functions and associated
3388     // data. That would reduce startup time and code size. Registration
3389     // function serves as a COMDAT group key.
3390     auto ComdatKey = M.getOrInsertComdat(RegFn->getName());
3391     RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
3392     RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility);
3393     RegFn->setComdat(ComdatKey);
3394     UnRegFn->setComdat(ComdatKey);
3395     DeviceImages->setComdat(ComdatKey);
3396     Desc->setComdat(ComdatKey);
3397   }
3398   return RegFn;
3399 }
3400 
3401 void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *ID,
3402                                          llvm::Constant *Addr, uint64_t Size,
3403                                          int32_t Flags) {
3404   StringRef Name = Addr->getName();
3405   auto *TgtOffloadEntryType = cast<llvm::StructType>(
3406       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()));
3407   llvm::LLVMContext &C = CGM.getModule().getContext();
3408   llvm::Module &M = CGM.getModule();
3409 
3410   // Make sure the address has the right type.
3411   llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy);
3412 
3413   // Create constant string with the name.
3414   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3415 
3416   llvm::GlobalVariable *Str =
3417       new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true,
3418                                llvm::GlobalValue::InternalLinkage, StrPtrInit,
3419                                ".omp_offloading.entry_name");
3420   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3421   llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy);
3422 
3423   // We can't have any padding between symbols, so we need to have 1-byte
3424   // alignment.
3425   auto Align = CharUnits::fromQuantity(1);
3426 
3427   // Create the entry struct.
3428   ConstantInitBuilder EntryBuilder(CGM);
3429   auto EntryInit = EntryBuilder.beginStruct(TgtOffloadEntryType);
3430   EntryInit.add(AddrPtr);
3431   EntryInit.add(StrPtr);
3432   EntryInit.addInt(CGM.SizeTy, Size);
3433   EntryInit.addInt(CGM.Int32Ty, Flags);
3434   EntryInit.addInt(CGM.Int32Ty, 0);
3435   llvm::GlobalVariable *Entry =
3436     EntryInit.finishAndCreateGlobal(".omp_offloading.entry",
3437                                     Align,
3438                                     /*constant*/ true,
3439                                     llvm::GlobalValue::ExternalLinkage);
3440 
3441   // The entry has to be created in the section the linker expects it to be.
3442   Entry->setSection(".omp_offloading.entries");
3443 }
3444 
3445 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3446   // Emit the offloading entries and metadata so that the device codegen side
3447   // can easily figure out what to emit. The produced metadata looks like
3448   // this:
3449   //
3450   // !omp_offload.info = !{!1, ...}
3451   //
3452   // Right now we only generate metadata for function that contain target
3453   // regions.
3454 
3455   // If we do not have entries, we dont need to do anything.
3456   if (OffloadEntriesInfoManager.empty())
3457     return;
3458 
3459   llvm::Module &M = CGM.getModule();
3460   llvm::LLVMContext &C = M.getContext();
3461   SmallVector<OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
3462       OrderedEntries(OffloadEntriesInfoManager.size());
3463 
3464   // Create the offloading info metadata node.
3465   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3466 
3467   // Auxiliary methods to create metadata values and strings.
3468   auto getMDInt = [&](unsigned v) {
3469     return llvm::ConstantAsMetadata::get(
3470         llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v));
3471   };
3472 
3473   auto getMDString = [&](StringRef v) { return llvm::MDString::get(C, v); };
3474 
3475   // Create function that emits metadata for each target region entry;
3476   auto &&TargetRegionMetadataEmitter = [&](
3477       unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line,
3478       OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3479     llvm::SmallVector<llvm::Metadata *, 32> Ops;
3480     // Generate metadata for target regions. Each entry of this metadata
3481     // contains:
3482     // - Entry 0 -> Kind of this type of metadata (0).
3483     // - Entry 1 -> Device ID of the file where the entry was identified.
3484     // - Entry 2 -> File ID of the file where the entry was identified.
3485     // - Entry 3 -> Mangled name of the function where the entry was identified.
3486     // - Entry 4 -> Line in the file where the entry was identified.
3487     // - Entry 5 -> Order the entry was created.
3488     // The first element of the metadata node is the kind.
3489     Ops.push_back(getMDInt(E.getKind()));
3490     Ops.push_back(getMDInt(DeviceID));
3491     Ops.push_back(getMDInt(FileID));
3492     Ops.push_back(getMDString(ParentName));
3493     Ops.push_back(getMDInt(Line));
3494     Ops.push_back(getMDInt(E.getOrder()));
3495 
3496     // Save this entry in the right position of the ordered entries array.
3497     OrderedEntries[E.getOrder()] = &E;
3498 
3499     // Add metadata to the named metadata node.
3500     MD->addOperand(llvm::MDNode::get(C, Ops));
3501   };
3502 
3503   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3504       TargetRegionMetadataEmitter);
3505 
3506   for (auto *E : OrderedEntries) {
3507     assert(E && "All ordered entries must exist!");
3508     if (auto *CE =
3509             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3510                 E)) {
3511       assert(CE->getID() && CE->getAddress() &&
3512              "Entry ID and Addr are invalid!");
3513       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0);
3514     } else
3515       llvm_unreachable("Unsupported entry kind.");
3516   }
3517 }
3518 
3519 /// \brief Loads all the offload entries information from the host IR
3520 /// metadata.
3521 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3522   // If we are in target mode, load the metadata from the host IR. This code has
3523   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3524 
3525   if (!CGM.getLangOpts().OpenMPIsDevice)
3526     return;
3527 
3528   if (CGM.getLangOpts().OMPHostIRFile.empty())
3529     return;
3530 
3531   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3532   if (Buf.getError())
3533     return;
3534 
3535   llvm::LLVMContext C;
3536   auto ME = expectedToErrorOrAndEmitErrors(
3537       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3538 
3539   if (ME.getError())
3540     return;
3541 
3542   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3543   if (!MD)
3544     return;
3545 
3546   for (auto I : MD->operands()) {
3547     llvm::MDNode *MN = cast<llvm::MDNode>(I);
3548 
3549     auto getMDInt = [&](unsigned Idx) {
3550       llvm::ConstantAsMetadata *V =
3551           cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3552       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3553     };
3554 
3555     auto getMDString = [&](unsigned Idx) {
3556       llvm::MDString *V = cast<llvm::MDString>(MN->getOperand(Idx));
3557       return V->getString();
3558     };
3559 
3560     switch (getMDInt(0)) {
3561     default:
3562       llvm_unreachable("Unexpected metadata!");
3563       break;
3564     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3565         OFFLOAD_ENTRY_INFO_TARGET_REGION:
3566       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3567           /*DeviceID=*/getMDInt(1), /*FileID=*/getMDInt(2),
3568           /*ParentName=*/getMDString(3), /*Line=*/getMDInt(4),
3569           /*Order=*/getMDInt(5));
3570       break;
3571     }
3572   }
3573 }
3574 
3575 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3576   if (!KmpRoutineEntryPtrTy) {
3577     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3578     auto &C = CGM.getContext();
3579     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3580     FunctionProtoType::ExtProtoInfo EPI;
3581     KmpRoutineEntryPtrQTy = C.getPointerType(
3582         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3583     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3584   }
3585 }
3586 
3587 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
3588                                        QualType FieldTy) {
3589   auto *Field = FieldDecl::Create(
3590       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
3591       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
3592       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
3593   Field->setAccess(AS_public);
3594   DC->addDecl(Field);
3595   return Field;
3596 }
3597 
3598 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3599 
3600   // Make sure the type of the entry is already created. This is the type we
3601   // have to create:
3602   // struct __tgt_offload_entry{
3603   //   void      *addr;       // Pointer to the offload entry info.
3604   //                          // (function or global)
3605   //   char      *name;       // Name of the function or global.
3606   //   size_t     size;       // Size of the entry info (0 if it a function).
3607   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3608   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3609   // };
3610   if (TgtOffloadEntryQTy.isNull()) {
3611     ASTContext &C = CGM.getContext();
3612     auto *RD = C.buildImplicitRecord("__tgt_offload_entry");
3613     RD->startDefinition();
3614     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3615     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3616     addFieldToRecordDecl(C, RD, C.getSizeType());
3617     addFieldToRecordDecl(
3618         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3619     addFieldToRecordDecl(
3620         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3621     RD->completeDefinition();
3622     TgtOffloadEntryQTy = C.getRecordType(RD);
3623   }
3624   return TgtOffloadEntryQTy;
3625 }
3626 
3627 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() {
3628   // These are the types we need to build:
3629   // struct __tgt_device_image{
3630   // void   *ImageStart;       // Pointer to the target code start.
3631   // void   *ImageEnd;         // Pointer to the target code end.
3632   // // We also add the host entries to the device image, as it may be useful
3633   // // for the target runtime to have access to that information.
3634   // __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all
3635   //                                       // the entries.
3636   // __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
3637   //                                       // entries (non inclusive).
3638   // };
3639   if (TgtDeviceImageQTy.isNull()) {
3640     ASTContext &C = CGM.getContext();
3641     auto *RD = C.buildImplicitRecord("__tgt_device_image");
3642     RD->startDefinition();
3643     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3644     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3645     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
3646     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
3647     RD->completeDefinition();
3648     TgtDeviceImageQTy = C.getRecordType(RD);
3649   }
3650   return TgtDeviceImageQTy;
3651 }
3652 
3653 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() {
3654   // struct __tgt_bin_desc{
3655   //   int32_t              NumDevices;      // Number of devices supported.
3656   //   __tgt_device_image   *DeviceImages;   // Arrays of device images
3657   //                                         // (one per device).
3658   //   __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all the
3659   //                                         // entries.
3660   //   __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
3661   //                                         // entries (non inclusive).
3662   // };
3663   if (TgtBinaryDescriptorQTy.isNull()) {
3664     ASTContext &C = CGM.getContext();
3665     auto *RD = C.buildImplicitRecord("__tgt_bin_desc");
3666     RD->startDefinition();
3667     addFieldToRecordDecl(
3668         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3669     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy()));
3670     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
3671     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
3672     RD->completeDefinition();
3673     TgtBinaryDescriptorQTy = C.getRecordType(RD);
3674   }
3675   return TgtBinaryDescriptorQTy;
3676 }
3677 
3678 namespace {
3679 struct PrivateHelpersTy {
3680   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
3681                    const VarDecl *PrivateElemInit)
3682       : Original(Original), PrivateCopy(PrivateCopy),
3683         PrivateElemInit(PrivateElemInit) {}
3684   const VarDecl *Original;
3685   const VarDecl *PrivateCopy;
3686   const VarDecl *PrivateElemInit;
3687 };
3688 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3689 } // anonymous namespace
3690 
3691 static RecordDecl *
3692 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3693   if (!Privates.empty()) {
3694     auto &C = CGM.getContext();
3695     // Build struct .kmp_privates_t. {
3696     //         /*  private vars  */
3697     //       };
3698     auto *RD = C.buildImplicitRecord(".kmp_privates.t");
3699     RD->startDefinition();
3700     for (auto &&Pair : Privates) {
3701       auto *VD = Pair.second.Original;
3702       auto Type = VD->getType();
3703       Type = Type.getNonReferenceType();
3704       auto *FD = addFieldToRecordDecl(C, RD, Type);
3705       if (VD->hasAttrs()) {
3706         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3707              E(VD->getAttrs().end());
3708              I != E; ++I)
3709           FD->addAttr(*I);
3710       }
3711     }
3712     RD->completeDefinition();
3713     return RD;
3714   }
3715   return nullptr;
3716 }
3717 
3718 static RecordDecl *
3719 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3720                          QualType KmpInt32Ty,
3721                          QualType KmpRoutineEntryPointerQTy) {
3722   auto &C = CGM.getContext();
3723   // Build struct kmp_task_t {
3724   //         void *              shareds;
3725   //         kmp_routine_entry_t routine;
3726   //         kmp_int32           part_id;
3727   //         kmp_cmplrdata_t data1;
3728   //         kmp_cmplrdata_t data2;
3729   // For taskloops additional fields:
3730   //         kmp_uint64          lb;
3731   //         kmp_uint64          ub;
3732   //         kmp_int64           st;
3733   //         kmp_int32           liter;
3734   //         void *              reductions;
3735   //       };
3736   auto *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3737   UD->startDefinition();
3738   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3739   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3740   UD->completeDefinition();
3741   QualType KmpCmplrdataTy = C.getRecordType(UD);
3742   auto *RD = C.buildImplicitRecord("kmp_task_t");
3743   RD->startDefinition();
3744   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3745   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3746   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3747   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3748   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3749   if (isOpenMPTaskLoopDirective(Kind)) {
3750     QualType KmpUInt64Ty =
3751         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3752     QualType KmpInt64Ty =
3753         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3754     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3755     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3756     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3757     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3758     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3759   }
3760   RD->completeDefinition();
3761   return RD;
3762 }
3763 
3764 static RecordDecl *
3765 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3766                                      ArrayRef<PrivateDataTy> Privates) {
3767   auto &C = CGM.getContext();
3768   // Build struct kmp_task_t_with_privates {
3769   //         kmp_task_t task_data;
3770   //         .kmp_privates_t. privates;
3771   //       };
3772   auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3773   RD->startDefinition();
3774   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3775   if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) {
3776     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3777   }
3778   RD->completeDefinition();
3779   return RD;
3780 }
3781 
3782 /// \brief Emit a proxy function which accepts kmp_task_t as the second
3783 /// argument.
3784 /// \code
3785 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3786 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3787 ///   For taskloops:
3788 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3789 ///   tt->reductions, tt->shareds);
3790 ///   return 0;
3791 /// }
3792 /// \endcode
3793 static llvm::Value *
3794 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3795                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3796                       QualType KmpTaskTWithPrivatesPtrQTy,
3797                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3798                       QualType SharedsPtrTy, llvm::Value *TaskFunction,
3799                       llvm::Value *TaskPrivatesMap) {
3800   auto &C = CGM.getContext();
3801   FunctionArgList Args;
3802   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3803                             ImplicitParamDecl::Other);
3804   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3805                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3806                                 ImplicitParamDecl::Other);
3807   Args.push_back(&GtidArg);
3808   Args.push_back(&TaskTypeArg);
3809   auto &TaskEntryFnInfo =
3810       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3811   auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3812   auto *TaskEntry =
3813       llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
3814                              ".omp_task_entry.", &CGM.getModule());
3815   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo);
3816   CodeGenFunction CGF(CGM);
3817   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
3818 
3819   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3820   // tt,
3821   // For taskloops:
3822   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3823   // tt->task_data.shareds);
3824   auto *GtidParam = CGF.EmitLoadOfScalar(
3825       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3826   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3827       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3828       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3829   auto *KmpTaskTWithPrivatesQTyRD =
3830       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3831   LValue Base =
3832       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3833   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3834   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3835   auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3836   auto *PartidParam = PartIdLVal.getPointer();
3837 
3838   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3839   auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3840   auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3841       CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(),
3842       CGF.ConvertTypeForMem(SharedsPtrTy));
3843 
3844   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3845   llvm::Value *PrivatesParam;
3846   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3847     auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3848     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3849         PrivatesLVal.getPointer(), CGF.VoidPtrTy);
3850   } else
3851     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3852 
3853   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3854                                TaskPrivatesMap,
3855                                CGF.Builder
3856                                    .CreatePointerBitCastOrAddrSpaceCast(
3857                                        TDBase.getAddress(), CGF.VoidPtrTy)
3858                                    .getPointer()};
3859   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3860                                           std::end(CommonArgs));
3861   if (isOpenMPTaskLoopDirective(Kind)) {
3862     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3863     auto LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3864     auto *LBParam = CGF.EmitLoadOfLValue(LBLVal, Loc).getScalarVal();
3865     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3866     auto UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3867     auto *UBParam = CGF.EmitLoadOfLValue(UBLVal, Loc).getScalarVal();
3868     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3869     auto StLVal = CGF.EmitLValueForField(Base, *StFI);
3870     auto *StParam = CGF.EmitLoadOfLValue(StLVal, Loc).getScalarVal();
3871     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3872     auto LILVal = CGF.EmitLValueForField(Base, *LIFI);
3873     auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal();
3874     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3875     auto RLVal = CGF.EmitLValueForField(Base, *RFI);
3876     auto *RParam = CGF.EmitLoadOfLValue(RLVal, Loc).getScalarVal();
3877     CallArgs.push_back(LBParam);
3878     CallArgs.push_back(UBParam);
3879     CallArgs.push_back(StParam);
3880     CallArgs.push_back(LIParam);
3881     CallArgs.push_back(RParam);
3882   }
3883   CallArgs.push_back(SharedsParam);
3884 
3885   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3886                                                   CallArgs);
3887   CGF.EmitStoreThroughLValue(
3888       RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3889       CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3890   CGF.FinishFunction();
3891   return TaskEntry;
3892 }
3893 
3894 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3895                                             SourceLocation Loc,
3896                                             QualType KmpInt32Ty,
3897                                             QualType KmpTaskTWithPrivatesPtrQTy,
3898                                             QualType KmpTaskTWithPrivatesQTy) {
3899   auto &C = CGM.getContext();
3900   FunctionArgList Args;
3901   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3902                             ImplicitParamDecl::Other);
3903   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3904                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3905                                 ImplicitParamDecl::Other);
3906   Args.push_back(&GtidArg);
3907   Args.push_back(&TaskTypeArg);
3908   FunctionType::ExtInfo Info;
3909   auto &DestructorFnInfo =
3910       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3911   auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo);
3912   auto *DestructorFn =
3913       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3914                              ".omp_task_destructor.", &CGM.getModule());
3915   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn,
3916                                     DestructorFnInfo);
3917   CodeGenFunction CGF(CGM);
3918   CGF.disableDebugInfo();
3919   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3920                     Args);
3921 
3922   LValue Base = CGF.EmitLoadOfPointerLValue(
3923       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3924       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3925   auto *KmpTaskTWithPrivatesQTyRD =
3926       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3927   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3928   Base = CGF.EmitLValueForField(Base, *FI);
3929   for (auto *Field :
3930        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3931     if (auto DtorKind = Field->getType().isDestructedType()) {
3932       auto FieldLValue = CGF.EmitLValueForField(Base, Field);
3933       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
3934     }
3935   }
3936   CGF.FinishFunction();
3937   return DestructorFn;
3938 }
3939 
3940 /// \brief Emit a privates mapping function for correct handling of private and
3941 /// firstprivate variables.
3942 /// \code
3943 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3944 /// **noalias priv1,...,  <tyn> **noalias privn) {
3945 ///   *priv1 = &.privates.priv1;
3946 ///   ...;
3947 ///   *privn = &.privates.privn;
3948 /// }
3949 /// \endcode
3950 static llvm::Value *
3951 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3952                                ArrayRef<const Expr *> PrivateVars,
3953                                ArrayRef<const Expr *> FirstprivateVars,
3954                                ArrayRef<const Expr *> LastprivateVars,
3955                                QualType PrivatesQTy,
3956                                ArrayRef<PrivateDataTy> Privates) {
3957   auto &C = CGM.getContext();
3958   FunctionArgList Args;
3959   ImplicitParamDecl TaskPrivatesArg(
3960       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3961       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3962       ImplicitParamDecl::Other);
3963   Args.push_back(&TaskPrivatesArg);
3964   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
3965   unsigned Counter = 1;
3966   for (auto *E: PrivateVars) {
3967     Args.push_back(ImplicitParamDecl::Create(
3968         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3969         C.getPointerType(C.getPointerType(E->getType()))
3970             .withConst()
3971             .withRestrict(),
3972         ImplicitParamDecl::Other));
3973     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3974     PrivateVarsPos[VD] = Counter;
3975     ++Counter;
3976   }
3977   for (auto *E : FirstprivateVars) {
3978     Args.push_back(ImplicitParamDecl::Create(
3979         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3980         C.getPointerType(C.getPointerType(E->getType()))
3981             .withConst()
3982             .withRestrict(),
3983         ImplicitParamDecl::Other));
3984     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3985     PrivateVarsPos[VD] = Counter;
3986     ++Counter;
3987   }
3988   for (auto *E: LastprivateVars) {
3989     Args.push_back(ImplicitParamDecl::Create(
3990         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3991         C.getPointerType(C.getPointerType(E->getType()))
3992             .withConst()
3993             .withRestrict(),
3994         ImplicitParamDecl::Other));
3995     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3996     PrivateVarsPos[VD] = Counter;
3997     ++Counter;
3998   }
3999   auto &TaskPrivatesMapFnInfo =
4000       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4001   auto *TaskPrivatesMapTy =
4002       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4003   auto *TaskPrivatesMap = llvm::Function::Create(
4004       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage,
4005       ".omp_task_privates_map.", &CGM.getModule());
4006   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap,
4007                                     TaskPrivatesMapFnInfo);
4008   TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4009   TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4010   TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4011   CodeGenFunction CGF(CGM);
4012   CGF.disableDebugInfo();
4013   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4014                     TaskPrivatesMapFnInfo, Args);
4015 
4016   // *privi = &.privates.privi;
4017   LValue Base = CGF.EmitLoadOfPointerLValue(
4018       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4019       TaskPrivatesArg.getType()->castAs<PointerType>());
4020   auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4021   Counter = 0;
4022   for (auto *Field : PrivatesQTyRD->fields()) {
4023     auto FieldLVal = CGF.EmitLValueForField(Base, Field);
4024     auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4025     auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4026     auto RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4027         RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
4028     CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
4029     ++Counter;
4030   }
4031   CGF.FinishFunction();
4032   return TaskPrivatesMap;
4033 }
4034 
4035 static int array_pod_sort_comparator(const PrivateDataTy *P1,
4036                                      const PrivateDataTy *P2) {
4037   return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0);
4038 }
4039 
4040 /// Emit initialization for private variables in task-based directives.
4041 static void emitPrivatesInit(CodeGenFunction &CGF,
4042                              const OMPExecutableDirective &D,
4043                              Address KmpTaskSharedsPtr, LValue TDBase,
4044                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4045                              QualType SharedsTy, QualType SharedsPtrTy,
4046                              const OMPTaskDataTy &Data,
4047                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4048   auto &C = CGF.getContext();
4049   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4050   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4051   LValue SrcBase;
4052   if (!Data.FirstprivateVars.empty()) {
4053     SrcBase = CGF.MakeAddrLValue(
4054         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4055             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4056         SharedsTy);
4057   }
4058   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(
4059       cast<CapturedStmt>(*D.getAssociatedStmt()));
4060   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4061   for (auto &&Pair : Privates) {
4062     auto *VD = Pair.second.PrivateCopy;
4063     auto *Init = VD->getAnyInitializer();
4064     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4065                              !CGF.isTrivialInitializer(Init)))) {
4066       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4067       if (auto *Elem = Pair.second.PrivateElemInit) {
4068         auto *OriginalVD = Pair.second.Original;
4069         auto *SharedField = CapturesInfo.lookup(OriginalVD);
4070         auto SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4071         SharedRefLValue = CGF.MakeAddrLValue(
4072             Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
4073             SharedRefLValue.getType(),
4074             LValueBaseInfo(AlignmentSource::Decl,
4075                            SharedRefLValue.getBaseInfo().getMayAlias()));
4076         QualType Type = OriginalVD->getType();
4077         if (Type->isArrayType()) {
4078           // Initialize firstprivate array.
4079           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4080             // Perform simple memcpy.
4081             CGF.EmitAggregateAssign(PrivateLValue.getAddress(),
4082                                     SharedRefLValue.getAddress(), Type);
4083           } else {
4084             // Initialize firstprivate array using element-by-element
4085             // initialization.
4086             CGF.EmitOMPAggregateAssign(
4087                 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
4088                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4089                                                   Address SrcElement) {
4090                   // Clean up any temporaries needed by the initialization.
4091                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
4092                   InitScope.addPrivate(
4093                       Elem, [SrcElement]() -> Address { return SrcElement; });
4094                   (void)InitScope.Privatize();
4095                   // Emit initialization for single element.
4096                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
4097                       CGF, &CapturesInfo);
4098                   CGF.EmitAnyExprToMem(Init, DestElement,
4099                                        Init->getType().getQualifiers(),
4100                                        /*IsInitializer=*/false);
4101                 });
4102           }
4103         } else {
4104           CodeGenFunction::OMPPrivateScope InitScope(CGF);
4105           InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
4106             return SharedRefLValue.getAddress();
4107           });
4108           (void)InitScope.Privatize();
4109           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4110           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4111                              /*capturedByInit=*/false);
4112         }
4113       } else
4114         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4115     }
4116     ++FI;
4117   }
4118 }
4119 
4120 /// Check if duplication function is required for taskloops.
4121 static bool checkInitIsRequired(CodeGenFunction &CGF,
4122                                 ArrayRef<PrivateDataTy> Privates) {
4123   bool InitRequired = false;
4124   for (auto &&Pair : Privates) {
4125     auto *VD = Pair.second.PrivateCopy;
4126     auto *Init = VD->getAnyInitializer();
4127     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4128                                     !CGF.isTrivialInitializer(Init));
4129   }
4130   return InitRequired;
4131 }
4132 
4133 
4134 /// Emit task_dup function (for initialization of
4135 /// private/firstprivate/lastprivate vars and last_iter flag)
4136 /// \code
4137 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4138 /// lastpriv) {
4139 /// // setup lastprivate flag
4140 ///    task_dst->last = lastpriv;
4141 /// // could be constructor calls here...
4142 /// }
4143 /// \endcode
4144 static llvm::Value *
4145 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4146                     const OMPExecutableDirective &D,
4147                     QualType KmpTaskTWithPrivatesPtrQTy,
4148                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4149                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4150                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4151                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4152   auto &C = CGM.getContext();
4153   FunctionArgList Args;
4154   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4155                            KmpTaskTWithPrivatesPtrQTy,
4156                            ImplicitParamDecl::Other);
4157   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4158                            KmpTaskTWithPrivatesPtrQTy,
4159                            ImplicitParamDecl::Other);
4160   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4161                                 ImplicitParamDecl::Other);
4162   Args.push_back(&DstArg);
4163   Args.push_back(&SrcArg);
4164   Args.push_back(&LastprivArg);
4165   auto &TaskDupFnInfo =
4166       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4167   auto *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4168   auto *TaskDup =
4169       llvm::Function::Create(TaskDupTy, llvm::GlobalValue::InternalLinkage,
4170                              ".omp_task_dup.", &CGM.getModule());
4171   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskDup, TaskDupFnInfo);
4172   CodeGenFunction CGF(CGM);
4173   CGF.disableDebugInfo();
4174   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args);
4175 
4176   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4177       CGF.GetAddrOfLocalVar(&DstArg),
4178       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4179   // task_dst->liter = lastpriv;
4180   if (WithLastIter) {
4181     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4182     LValue Base = CGF.EmitLValueForField(
4183         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4184     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4185     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4186         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4187     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4188   }
4189 
4190   // Emit initial values for private copies (if any).
4191   assert(!Privates.empty());
4192   Address KmpTaskSharedsPtr = Address::invalid();
4193   if (!Data.FirstprivateVars.empty()) {
4194     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4195         CGF.GetAddrOfLocalVar(&SrcArg),
4196         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4197     LValue Base = CGF.EmitLValueForField(
4198         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4199     KmpTaskSharedsPtr = Address(
4200         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4201                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4202                                                   KmpTaskTShareds)),
4203                              Loc),
4204         CGF.getNaturalTypeAlignment(SharedsTy));
4205   }
4206   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4207                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4208   CGF.FinishFunction();
4209   return TaskDup;
4210 }
4211 
4212 /// Checks if destructor function is required to be generated.
4213 /// \return true if cleanups are required, false otherwise.
4214 static bool
4215 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4216   bool NeedsCleanup = false;
4217   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4218   auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4219   for (auto *FD : PrivateRD->fields()) {
4220     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4221     if (NeedsCleanup)
4222       break;
4223   }
4224   return NeedsCleanup;
4225 }
4226 
4227 CGOpenMPRuntime::TaskResultTy
4228 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4229                               const OMPExecutableDirective &D,
4230                               llvm::Value *TaskFunction, QualType SharedsTy,
4231                               Address Shareds, const OMPTaskDataTy &Data) {
4232   auto &C = CGM.getContext();
4233   llvm::SmallVector<PrivateDataTy, 4> Privates;
4234   // Aggregate privates and sort them by the alignment.
4235   auto I = Data.PrivateCopies.begin();
4236   for (auto *E : Data.PrivateVars) {
4237     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4238     Privates.push_back(std::make_pair(
4239         C.getDeclAlign(VD),
4240         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4241                          /*PrivateElemInit=*/nullptr)));
4242     ++I;
4243   }
4244   I = Data.FirstprivateCopies.begin();
4245   auto IElemInitRef = Data.FirstprivateInits.begin();
4246   for (auto *E : Data.FirstprivateVars) {
4247     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4248     Privates.push_back(std::make_pair(
4249         C.getDeclAlign(VD),
4250         PrivateHelpersTy(
4251             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4252             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))));
4253     ++I;
4254     ++IElemInitRef;
4255   }
4256   I = Data.LastprivateCopies.begin();
4257   for (auto *E : Data.LastprivateVars) {
4258     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4259     Privates.push_back(std::make_pair(
4260         C.getDeclAlign(VD),
4261         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4262                          /*PrivateElemInit=*/nullptr)));
4263     ++I;
4264   }
4265   llvm::array_pod_sort(Privates.begin(), Privates.end(),
4266                        array_pod_sort_comparator);
4267   auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4268   // Build type kmp_routine_entry_t (if not built yet).
4269   emitKmpRoutineEntryT(KmpInt32Ty);
4270   // Build type kmp_task_t (if not built yet).
4271   if (KmpTaskTQTy.isNull()) {
4272     KmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4273         CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4274   }
4275   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4276   // Build particular struct kmp_task_t for the given task.
4277   auto *KmpTaskTWithPrivatesQTyRD =
4278       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4279   auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4280   QualType KmpTaskTWithPrivatesPtrQTy =
4281       C.getPointerType(KmpTaskTWithPrivatesQTy);
4282   auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4283   auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo();
4284   auto *KmpTaskTWithPrivatesTySize = CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4285   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4286 
4287   // Emit initial values for private copies (if any).
4288   llvm::Value *TaskPrivatesMap = nullptr;
4289   auto *TaskPrivatesMapTy =
4290       std::next(cast<llvm::Function>(TaskFunction)->arg_begin(), 3)->getType();
4291   if (!Privates.empty()) {
4292     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4293     TaskPrivatesMap = emitTaskPrivateMappingFunction(
4294         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
4295         FI->getType(), Privates);
4296     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4297         TaskPrivatesMap, TaskPrivatesMapTy);
4298   } else {
4299     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4300         cast<llvm::PointerType>(TaskPrivatesMapTy));
4301   }
4302   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4303   // kmp_task_t *tt);
4304   auto *TaskEntry = emitProxyTaskFunction(
4305       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4306       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4307       TaskPrivatesMap);
4308 
4309   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4310   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4311   // kmp_routine_entry_t *task_entry);
4312   // Task flags. Format is taken from
4313   // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
4314   // description of kmp_tasking_flags struct.
4315   enum {
4316     TiedFlag = 0x1,
4317     FinalFlag = 0x2,
4318     DestructorsFlag = 0x8,
4319     PriorityFlag = 0x20
4320   };
4321   unsigned Flags = Data.Tied ? TiedFlag : 0;
4322   bool NeedsCleanup = false;
4323   if (!Privates.empty()) {
4324     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
4325     if (NeedsCleanup)
4326       Flags = Flags | DestructorsFlag;
4327   }
4328   if (Data.Priority.getInt())
4329     Flags = Flags | PriorityFlag;
4330   auto *TaskFlags =
4331       Data.Final.getPointer()
4332           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4333                                      CGF.Builder.getInt32(FinalFlag),
4334                                      CGF.Builder.getInt32(/*C=*/0))
4335           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4336   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4337   auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4338   llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
4339                               getThreadID(CGF, Loc), TaskFlags,
4340                               KmpTaskTWithPrivatesTySize, SharedsSize,
4341                               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4342                                   TaskEntry, KmpRoutineEntryPtrTy)};
4343   auto *NewTask = CGF.EmitRuntimeCall(
4344       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
4345   auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4346       NewTask, KmpTaskTWithPrivatesPtrTy);
4347   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4348                                                KmpTaskTWithPrivatesQTy);
4349   LValue TDBase =
4350       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4351   // Fill the data in the resulting kmp_task_t record.
4352   // Copy shareds if there are any.
4353   Address KmpTaskSharedsPtr = Address::invalid();
4354   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4355     KmpTaskSharedsPtr =
4356         Address(CGF.EmitLoadOfScalar(
4357                     CGF.EmitLValueForField(
4358                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4359                                            KmpTaskTShareds)),
4360                     Loc),
4361                 CGF.getNaturalTypeAlignment(SharedsTy));
4362     CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy);
4363   }
4364   // Emit initial values for private copies (if any).
4365   TaskResultTy Result;
4366   if (!Privates.empty()) {
4367     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4368                      SharedsTy, SharedsPtrTy, Data, Privates,
4369                      /*ForDup=*/false);
4370     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4371         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4372       Result.TaskDupFn = emitTaskDupFunction(
4373           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4374           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4375           /*WithLastIter=*/!Data.LastprivateVars.empty());
4376     }
4377   }
4378   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4379   enum { Priority = 0, Destructors = 1 };
4380   // Provide pointer to function with destructors for privates.
4381   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4382   auto *KmpCmplrdataUD = (*FI)->getType()->getAsUnionType()->getDecl();
4383   if (NeedsCleanup) {
4384     llvm::Value *DestructorFn = emitDestructorsFunction(
4385         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4386         KmpTaskTWithPrivatesQTy);
4387     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4388     LValue DestructorsLV = CGF.EmitLValueForField(
4389         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4390     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4391                               DestructorFn, KmpRoutineEntryPtrTy),
4392                           DestructorsLV);
4393   }
4394   // Set priority.
4395   if (Data.Priority.getInt()) {
4396     LValue Data2LV = CGF.EmitLValueForField(
4397         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4398     LValue PriorityLV = CGF.EmitLValueForField(
4399         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4400     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4401   }
4402   Result.NewTask = NewTask;
4403   Result.TaskEntry = TaskEntry;
4404   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4405   Result.TDBase = TDBase;
4406   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4407   return Result;
4408 }
4409 
4410 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
4411                                    const OMPExecutableDirective &D,
4412                                    llvm::Value *TaskFunction,
4413                                    QualType SharedsTy, Address Shareds,
4414                                    const Expr *IfCond,
4415                                    const OMPTaskDataTy &Data) {
4416   if (!CGF.HaveInsertPoint())
4417     return;
4418 
4419   TaskResultTy Result =
4420       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4421   llvm::Value *NewTask = Result.NewTask;
4422   llvm::Value *TaskEntry = Result.TaskEntry;
4423   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4424   LValue TDBase = Result.TDBase;
4425   RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4426   auto &C = CGM.getContext();
4427   // Process list of dependences.
4428   Address DependenciesArray = Address::invalid();
4429   unsigned NumDependencies = Data.Dependences.size();
4430   if (NumDependencies) {
4431     // Dependence kind for RTL.
4432     enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 };
4433     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4434     RecordDecl *KmpDependInfoRD;
4435     QualType FlagsTy =
4436         C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4437     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4438     if (KmpDependInfoTy.isNull()) {
4439       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4440       KmpDependInfoRD->startDefinition();
4441       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4442       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4443       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4444       KmpDependInfoRD->completeDefinition();
4445       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4446     } else
4447       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4448     CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy);
4449     // Define type kmp_depend_info[<Dependences.size()>];
4450     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4451         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
4452         ArrayType::Normal, /*IndexTypeQuals=*/0);
4453     // kmp_depend_info[<Dependences.size()>] deps;
4454     DependenciesArray =
4455         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4456     for (unsigned i = 0; i < NumDependencies; ++i) {
4457       const Expr *E = Data.Dependences[i].second;
4458       auto Addr = CGF.EmitLValue(E);
4459       llvm::Value *Size;
4460       QualType Ty = E->getType();
4461       if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4462         LValue UpAddrLVal =
4463             CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
4464         llvm::Value *UpAddr =
4465             CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
4466         llvm::Value *LowIntPtr =
4467             CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
4468         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
4469         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4470       } else
4471         Size = CGF.getTypeSize(Ty);
4472       auto Base = CGF.MakeAddrLValue(
4473           CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize),
4474           KmpDependInfoTy);
4475       // deps[i].base_addr = &<Dependences[i].second>;
4476       auto BaseAddrLVal = CGF.EmitLValueForField(
4477           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4478       CGF.EmitStoreOfScalar(
4479           CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
4480           BaseAddrLVal);
4481       // deps[i].len = sizeof(<Dependences[i].second>);
4482       auto LenLVal = CGF.EmitLValueForField(
4483           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4484       CGF.EmitStoreOfScalar(Size, LenLVal);
4485       // deps[i].flags = <Dependences[i].first>;
4486       RTLDependenceKindTy DepKind;
4487       switch (Data.Dependences[i].first) {
4488       case OMPC_DEPEND_in:
4489         DepKind = DepIn;
4490         break;
4491       // Out and InOut dependencies must use the same code.
4492       case OMPC_DEPEND_out:
4493       case OMPC_DEPEND_inout:
4494         DepKind = DepInOut;
4495         break;
4496       case OMPC_DEPEND_source:
4497       case OMPC_DEPEND_sink:
4498       case OMPC_DEPEND_unknown:
4499         llvm_unreachable("Unknown task dependence type");
4500       }
4501       auto FlagsLVal = CGF.EmitLValueForField(
4502           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4503       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4504                             FlagsLVal);
4505     }
4506     DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4507         CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()),
4508         CGF.VoidPtrTy);
4509   }
4510 
4511   // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
4512   // libcall.
4513   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4514   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4515   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4516   // list is not empty
4517   auto *ThreadID = getThreadID(CGF, Loc);
4518   auto *UpLoc = emitUpdateLocation(CGF, Loc);
4519   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4520   llvm::Value *DepTaskArgs[7];
4521   if (NumDependencies) {
4522     DepTaskArgs[0] = UpLoc;
4523     DepTaskArgs[1] = ThreadID;
4524     DepTaskArgs[2] = NewTask;
4525     DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
4526     DepTaskArgs[4] = DependenciesArray.getPointer();
4527     DepTaskArgs[5] = CGF.Builder.getInt32(0);
4528     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4529   }
4530   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
4531                         &TaskArgs,
4532                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4533     if (!Data.Tied) {
4534       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4535       auto PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4536       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4537     }
4538     if (NumDependencies) {
4539       CGF.EmitRuntimeCall(
4540           createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
4541     } else {
4542       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
4543                           TaskArgs);
4544     }
4545     // Check if parent region is untied and build return for untied task;
4546     if (auto *Region =
4547             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4548       Region->emitUntiedSwitch(CGF);
4549   };
4550 
4551   llvm::Value *DepWaitTaskArgs[6];
4552   if (NumDependencies) {
4553     DepWaitTaskArgs[0] = UpLoc;
4554     DepWaitTaskArgs[1] = ThreadID;
4555     DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
4556     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
4557     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4558     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4559   }
4560   auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
4561                         NumDependencies, &DepWaitTaskArgs,
4562                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4563     auto &RT = CGF.CGM.getOpenMPRuntime();
4564     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4565     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4566     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4567     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4568     // is specified.
4569     if (NumDependencies)
4570       CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
4571                           DepWaitTaskArgs);
4572     // Call proxy_task_entry(gtid, new_task);
4573     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4574                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4575       Action.Enter(CGF);
4576       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4577       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4578                                                           OutlinedFnArgs);
4579     };
4580 
4581     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4582     // kmp_task_t *new_task);
4583     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4584     // kmp_task_t *new_task);
4585     RegionCodeGenTy RCG(CodeGen);
4586     CommonActionTy Action(
4587         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
4588         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
4589     RCG.setAction(Action);
4590     RCG(CGF);
4591   };
4592 
4593   if (IfCond)
4594     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4595   else {
4596     RegionCodeGenTy ThenRCG(ThenCodeGen);
4597     ThenRCG(CGF);
4598   }
4599 }
4600 
4601 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
4602                                        const OMPLoopDirective &D,
4603                                        llvm::Value *TaskFunction,
4604                                        QualType SharedsTy, Address Shareds,
4605                                        const Expr *IfCond,
4606                                        const OMPTaskDataTy &Data) {
4607   if (!CGF.HaveInsertPoint())
4608     return;
4609   TaskResultTy Result =
4610       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4611   // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
4612   // libcall.
4613   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4614   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4615   // sched, kmp_uint64 grainsize, void *task_dup);
4616   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4617   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4618   llvm::Value *IfVal;
4619   if (IfCond) {
4620     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4621                                       /*isSigned=*/true);
4622   } else
4623     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4624 
4625   LValue LBLVal = CGF.EmitLValueForField(
4626       Result.TDBase,
4627       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4628   auto *LBVar =
4629       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4630   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
4631                        /*IsInitializer=*/true);
4632   LValue UBLVal = CGF.EmitLValueForField(
4633       Result.TDBase,
4634       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4635   auto *UBVar =
4636       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4637   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
4638                        /*IsInitializer=*/true);
4639   LValue StLVal = CGF.EmitLValueForField(
4640       Result.TDBase,
4641       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4642   auto *StVar =
4643       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4644   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
4645                        /*IsInitializer=*/true);
4646   // Store reductions address.
4647   LValue RedLVal = CGF.EmitLValueForField(
4648       Result.TDBase,
4649       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4650   if (Data.Reductions)
4651     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4652   else {
4653     CGF.EmitNullInitialization(RedLVal.getAddress(),
4654                                CGF.getContext().VoidPtrTy);
4655   }
4656   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4657   llvm::Value *TaskArgs[] = {
4658       UpLoc,
4659       ThreadID,
4660       Result.NewTask,
4661       IfVal,
4662       LBLVal.getPointer(),
4663       UBLVal.getPointer(),
4664       CGF.EmitLoadOfScalar(StLVal, SourceLocation()),
4665       llvm::ConstantInt::getNullValue(
4666           CGF.IntTy), // Always 0 because taskgroup emitted by the compiler
4667       llvm::ConstantInt::getSigned(
4668           CGF.IntTy, Data.Schedule.getPointer()
4669                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
4670                          : NoSchedule),
4671       Data.Schedule.getPointer()
4672           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4673                                       /*isSigned=*/false)
4674           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
4675       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4676                              Result.TaskDupFn, CGF.VoidPtrTy)
4677                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
4678   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
4679 }
4680 
4681 /// \brief Emit reduction operation for each element of array (required for
4682 /// array sections) LHS op = RHS.
4683 /// \param Type Type of array.
4684 /// \param LHSVar Variable on the left side of the reduction operation
4685 /// (references element of array in original variable).
4686 /// \param RHSVar Variable on the right side of the reduction operation
4687 /// (references element of array in original variable).
4688 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
4689 /// RHSVar.
4690 static void EmitOMPAggregateReduction(
4691     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4692     const VarDecl *RHSVar,
4693     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4694                                   const Expr *, const Expr *)> &RedOpGen,
4695     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4696     const Expr *UpExpr = nullptr) {
4697   // Perform element-by-element initialization.
4698   QualType ElementTy;
4699   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4700   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4701 
4702   // Drill down to the base element type on both arrays.
4703   auto ArrayTy = Type->getAsArrayTypeUnsafe();
4704   auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4705 
4706   auto RHSBegin = RHSAddr.getPointer();
4707   auto LHSBegin = LHSAddr.getPointer();
4708   // Cast from pointer to array type to pointer to single element.
4709   auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
4710   // The basic structure here is a while-do loop.
4711   auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4712   auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4713   auto IsEmpty =
4714       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4715   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4716 
4717   // Enter the loop body, making that address the current address.
4718   auto EntryBB = CGF.Builder.GetInsertBlock();
4719   CGF.EmitBlock(BodyBB);
4720 
4721   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4722 
4723   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4724       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4725   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4726   Address RHSElementCurrent =
4727       Address(RHSElementPHI,
4728               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4729 
4730   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4731       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4732   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4733   Address LHSElementCurrent =
4734       Address(LHSElementPHI,
4735               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4736 
4737   // Emit copy.
4738   CodeGenFunction::OMPPrivateScope Scope(CGF);
4739   Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; });
4740   Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; });
4741   Scope.Privatize();
4742   RedOpGen(CGF, XExpr, EExpr, UpExpr);
4743   Scope.ForceCleanup();
4744 
4745   // Shift the address forward by one element.
4746   auto LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4747       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
4748   auto RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4749       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
4750   // Check whether we've reached the end.
4751   auto Done =
4752       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4753   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4754   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4755   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4756 
4757   // Done.
4758   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4759 }
4760 
4761 /// Emit reduction combiner. If the combiner is a simple expression emit it as
4762 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4763 /// UDR combiner function.
4764 static void emitReductionCombiner(CodeGenFunction &CGF,
4765                                   const Expr *ReductionOp) {
4766   if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
4767     if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4768       if (auto *DRE =
4769               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4770         if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4771           std::pair<llvm::Function *, llvm::Function *> Reduction =
4772               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
4773           RValue Func = RValue::get(Reduction.first);
4774           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4775           CGF.EmitIgnoredExpr(ReductionOp);
4776           return;
4777         }
4778   CGF.EmitIgnoredExpr(ReductionOp);
4779 }
4780 
4781 llvm::Value *CGOpenMPRuntime::emitReductionFunction(
4782     CodeGenModule &CGM, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
4783     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
4784     ArrayRef<const Expr *> ReductionOps) {
4785   auto &C = CGM.getContext();
4786 
4787   // void reduction_func(void *LHSArg, void *RHSArg);
4788   FunctionArgList Args;
4789   ImplicitParamDecl LHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
4790   ImplicitParamDecl RHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
4791   Args.push_back(&LHSArg);
4792   Args.push_back(&RHSArg);
4793   auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4794   auto *Fn = llvm::Function::Create(
4795       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
4796       ".omp.reduction.reduction_func", &CGM.getModule());
4797   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
4798   CodeGenFunction CGF(CGM);
4799   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
4800 
4801   // Dst = (void*[n])(LHSArg);
4802   // Src = (void*[n])(RHSArg);
4803   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4804       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4805       ArgsType), CGF.getPointerAlign());
4806   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4807       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
4808       ArgsType), CGF.getPointerAlign());
4809 
4810   //  ...
4811   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4812   //  ...
4813   CodeGenFunction::OMPPrivateScope Scope(CGF);
4814   auto IPriv = Privates.begin();
4815   unsigned Idx = 0;
4816   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4817     auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
4818     Scope.addPrivate(RHSVar, [&]() -> Address {
4819       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
4820     });
4821     auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
4822     Scope.addPrivate(LHSVar, [&]() -> Address {
4823       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
4824     });
4825     QualType PrivTy = (*IPriv)->getType();
4826     if (PrivTy->isVariablyModifiedType()) {
4827       // Get array size and emit VLA type.
4828       ++Idx;
4829       Address Elem =
4830           CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize());
4831       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
4832       auto *VLA = CGF.getContext().getAsVariableArrayType(PrivTy);
4833       auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
4834       CodeGenFunction::OpaqueValueMapping OpaqueMap(
4835           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
4836       CGF.EmitVariablyModifiedType(PrivTy);
4837     }
4838   }
4839   Scope.Privatize();
4840   IPriv = Privates.begin();
4841   auto ILHS = LHSExprs.begin();
4842   auto IRHS = RHSExprs.begin();
4843   for (auto *E : ReductionOps) {
4844     if ((*IPriv)->getType()->isArrayType()) {
4845       // Emit reduction for array section.
4846       auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4847       auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4848       EmitOMPAggregateReduction(
4849           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4850           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4851             emitReductionCombiner(CGF, E);
4852           });
4853     } else
4854       // Emit reduction for array subscript or single variable.
4855       emitReductionCombiner(CGF, E);
4856     ++IPriv;
4857     ++ILHS;
4858     ++IRHS;
4859   }
4860   Scope.ForceCleanup();
4861   CGF.FinishFunction();
4862   return Fn;
4863 }
4864 
4865 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
4866                                                   const Expr *ReductionOp,
4867                                                   const Expr *PrivateRef,
4868                                                   const DeclRefExpr *LHS,
4869                                                   const DeclRefExpr *RHS) {
4870   if (PrivateRef->getType()->isArrayType()) {
4871     // Emit reduction for array section.
4872     auto *LHSVar = cast<VarDecl>(LHS->getDecl());
4873     auto *RHSVar = cast<VarDecl>(RHS->getDecl());
4874     EmitOMPAggregateReduction(
4875         CGF, PrivateRef->getType(), LHSVar, RHSVar,
4876         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4877           emitReductionCombiner(CGF, ReductionOp);
4878         });
4879   } else
4880     // Emit reduction for array subscript or single variable.
4881     emitReductionCombiner(CGF, ReductionOp);
4882 }
4883 
4884 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
4885                                     ArrayRef<const Expr *> Privates,
4886                                     ArrayRef<const Expr *> LHSExprs,
4887                                     ArrayRef<const Expr *> RHSExprs,
4888                                     ArrayRef<const Expr *> ReductionOps,
4889                                     ReductionOptionsTy Options) {
4890   if (!CGF.HaveInsertPoint())
4891     return;
4892 
4893   bool WithNowait = Options.WithNowait;
4894   bool SimpleReduction = Options.SimpleReduction;
4895 
4896   // Next code should be emitted for reduction:
4897   //
4898   // static kmp_critical_name lock = { 0 };
4899   //
4900   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
4901   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
4902   //  ...
4903   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
4904   //  *(Type<n>-1*)rhs[<n>-1]);
4905   // }
4906   //
4907   // ...
4908   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
4909   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
4910   // RedList, reduce_func, &<lock>)) {
4911   // case 1:
4912   //  ...
4913   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4914   //  ...
4915   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4916   // break;
4917   // case 2:
4918   //  ...
4919   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
4920   //  ...
4921   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
4922   // break;
4923   // default:;
4924   // }
4925   //
4926   // if SimpleReduction is true, only the next code is generated:
4927   //  ...
4928   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4929   //  ...
4930 
4931   auto &C = CGM.getContext();
4932 
4933   if (SimpleReduction) {
4934     CodeGenFunction::RunCleanupsScope Scope(CGF);
4935     auto IPriv = Privates.begin();
4936     auto ILHS = LHSExprs.begin();
4937     auto IRHS = RHSExprs.begin();
4938     for (auto *E : ReductionOps) {
4939       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
4940                                   cast<DeclRefExpr>(*IRHS));
4941       ++IPriv;
4942       ++ILHS;
4943       ++IRHS;
4944     }
4945     return;
4946   }
4947 
4948   // 1. Build a list of reduction variables.
4949   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
4950   auto Size = RHSExprs.size();
4951   for (auto *E : Privates) {
4952     if (E->getType()->isVariablyModifiedType())
4953       // Reserve place for array size.
4954       ++Size;
4955   }
4956   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
4957   QualType ReductionArrayTy =
4958       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
4959                              /*IndexTypeQuals=*/0);
4960   Address ReductionList =
4961       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
4962   auto IPriv = Privates.begin();
4963   unsigned Idx = 0;
4964   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
4965     Address Elem =
4966       CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize());
4967     CGF.Builder.CreateStore(
4968         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4969             CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
4970         Elem);
4971     if ((*IPriv)->getType()->isVariablyModifiedType()) {
4972       // Store array size.
4973       ++Idx;
4974       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
4975                                              CGF.getPointerSize());
4976       llvm::Value *Size = CGF.Builder.CreateIntCast(
4977           CGF.getVLASize(
4978                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
4979               .first,
4980           CGF.SizeTy, /*isSigned=*/false);
4981       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
4982                               Elem);
4983     }
4984   }
4985 
4986   // 2. Emit reduce_func().
4987   auto *ReductionFn = emitReductionFunction(
4988       CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
4989       LHSExprs, RHSExprs, ReductionOps);
4990 
4991   // 3. Create static kmp_critical_name lock = { 0 };
4992   auto *Lock = getCriticalRegionLock(".reduction");
4993 
4994   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
4995   // RedList, reduce_func, &<lock>);
4996   auto *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
4997   auto *ThreadId = getThreadID(CGF, Loc);
4998   auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
4999   auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5000       ReductionList.getPointer(), CGF.VoidPtrTy);
5001   llvm::Value *Args[] = {
5002       IdentTLoc,                             // ident_t *<loc>
5003       ThreadId,                              // i32 <gtid>
5004       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5005       ReductionArrayTySize,                  // size_type sizeof(RedList)
5006       RL,                                    // void *RedList
5007       ReductionFn, // void (*) (void *, void *) <reduce_func>
5008       Lock         // kmp_critical_name *&<lock>
5009   };
5010   auto Res = CGF.EmitRuntimeCall(
5011       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
5012                                        : OMPRTL__kmpc_reduce),
5013       Args);
5014 
5015   // 5. Build switch(res)
5016   auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5017   auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5018 
5019   // 6. Build case 1:
5020   //  ...
5021   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5022   //  ...
5023   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5024   // break;
5025   auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5026   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5027   CGF.EmitBlock(Case1BB);
5028 
5029   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5030   llvm::Value *EndArgs[] = {
5031       IdentTLoc, // ident_t *<loc>
5032       ThreadId,  // i32 <gtid>
5033       Lock       // kmp_critical_name *&<lock>
5034   };
5035   auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps](
5036       CodeGenFunction &CGF, PrePostActionTy &Action) {
5037     auto &RT = CGF.CGM.getOpenMPRuntime();
5038     auto IPriv = Privates.begin();
5039     auto ILHS = LHSExprs.begin();
5040     auto IRHS = RHSExprs.begin();
5041     for (auto *E : ReductionOps) {
5042       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5043                                      cast<DeclRefExpr>(*IRHS));
5044       ++IPriv;
5045       ++ILHS;
5046       ++IRHS;
5047     }
5048   };
5049   RegionCodeGenTy RCG(CodeGen);
5050   CommonActionTy Action(
5051       nullptr, llvm::None,
5052       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
5053                                        : OMPRTL__kmpc_end_reduce),
5054       EndArgs);
5055   RCG.setAction(Action);
5056   RCG(CGF);
5057 
5058   CGF.EmitBranch(DefaultBB);
5059 
5060   // 7. Build case 2:
5061   //  ...
5062   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5063   //  ...
5064   // break;
5065   auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5066   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5067   CGF.EmitBlock(Case2BB);
5068 
5069   auto &&AtomicCodeGen = [Loc, &Privates, &LHSExprs, &RHSExprs, &ReductionOps](
5070       CodeGenFunction &CGF, PrePostActionTy &Action) {
5071     auto ILHS = LHSExprs.begin();
5072     auto IRHS = RHSExprs.begin();
5073     auto IPriv = Privates.begin();
5074     for (auto *E : ReductionOps) {
5075       const Expr *XExpr = nullptr;
5076       const Expr *EExpr = nullptr;
5077       const Expr *UpExpr = nullptr;
5078       BinaryOperatorKind BO = BO_Comma;
5079       if (auto *BO = dyn_cast<BinaryOperator>(E)) {
5080         if (BO->getOpcode() == BO_Assign) {
5081           XExpr = BO->getLHS();
5082           UpExpr = BO->getRHS();
5083         }
5084       }
5085       // Try to emit update expression as a simple atomic.
5086       auto *RHSExpr = UpExpr;
5087       if (RHSExpr) {
5088         // Analyze RHS part of the whole expression.
5089         if (auto *ACO = dyn_cast<AbstractConditionalOperator>(
5090                 RHSExpr->IgnoreParenImpCasts())) {
5091           // If this is a conditional operator, analyze its condition for
5092           // min/max reduction operator.
5093           RHSExpr = ACO->getCond();
5094         }
5095         if (auto *BORHS =
5096                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5097           EExpr = BORHS->getRHS();
5098           BO = BORHS->getOpcode();
5099         }
5100       }
5101       if (XExpr) {
5102         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5103         auto &&AtomicRedGen = [BO, VD,
5104                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5105                                     const Expr *EExpr, const Expr *UpExpr) {
5106           LValue X = CGF.EmitLValue(XExpr);
5107           RValue E;
5108           if (EExpr)
5109             E = CGF.EmitAnyExpr(EExpr);
5110           CGF.EmitOMPAtomicSimpleUpdateExpr(
5111               X, E, BO, /*IsXLHSInRHSPart=*/true,
5112               llvm::AtomicOrdering::Monotonic, Loc,
5113               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5114                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5115                 PrivateScope.addPrivate(
5116                     VD, [&CGF, VD, XRValue, Loc]() -> Address {
5117                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5118                       CGF.emitOMPSimpleStore(
5119                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5120                           VD->getType().getNonReferenceType(), Loc);
5121                       return LHSTemp;
5122                     });
5123                 (void)PrivateScope.Privatize();
5124                 return CGF.EmitAnyExpr(UpExpr);
5125               });
5126         };
5127         if ((*IPriv)->getType()->isArrayType()) {
5128           // Emit atomic reduction for array section.
5129           auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5130           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5131                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5132         } else
5133           // Emit atomic reduction for array subscript or single variable.
5134           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5135       } else {
5136         // Emit as a critical region.
5137         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5138                                      const Expr *, const Expr *) {
5139           auto &RT = CGF.CGM.getOpenMPRuntime();
5140           RT.emitCriticalRegion(
5141               CGF, ".atomic_reduction",
5142               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5143                 Action.Enter(CGF);
5144                 emitReductionCombiner(CGF, E);
5145               },
5146               Loc);
5147         };
5148         if ((*IPriv)->getType()->isArrayType()) {
5149           auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5150           auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5151           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5152                                     CritRedGen);
5153         } else
5154           CritRedGen(CGF, nullptr, nullptr, nullptr);
5155       }
5156       ++ILHS;
5157       ++IRHS;
5158       ++IPriv;
5159     }
5160   };
5161   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5162   if (!WithNowait) {
5163     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5164     llvm::Value *EndArgs[] = {
5165         IdentTLoc, // ident_t *<loc>
5166         ThreadId,  // i32 <gtid>
5167         Lock       // kmp_critical_name *&<lock>
5168     };
5169     CommonActionTy Action(nullptr, llvm::None,
5170                           createRuntimeFunction(OMPRTL__kmpc_end_reduce),
5171                           EndArgs);
5172     AtomicRCG.setAction(Action);
5173     AtomicRCG(CGF);
5174   } else
5175     AtomicRCG(CGF);
5176 
5177   CGF.EmitBranch(DefaultBB);
5178   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5179 }
5180 
5181 /// Generates unique name for artificial threadprivate variables.
5182 /// Format is: <Prefix> "." <Loc_raw_encoding> "_" <N>
5183 static std::string generateUniqueName(StringRef Prefix, SourceLocation Loc,
5184                                       unsigned N) {
5185   SmallString<256> Buffer;
5186   llvm::raw_svector_ostream Out(Buffer);
5187   Out << Prefix << "." << Loc.getRawEncoding() << "_" << N;
5188   return Out.str();
5189 }
5190 
5191 /// Emits reduction initializer function:
5192 /// \code
5193 /// void @.red_init(void* %arg) {
5194 /// %0 = bitcast void* %arg to <type>*
5195 /// store <type> <init>, <type>* %0
5196 /// ret void
5197 /// }
5198 /// \endcode
5199 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5200                                            SourceLocation Loc,
5201                                            ReductionCodeGen &RCG, unsigned N) {
5202   auto &C = CGM.getContext();
5203   FunctionArgList Args;
5204   ImplicitParamDecl Param(C, C.VoidPtrTy, ImplicitParamDecl::Other);
5205   Args.emplace_back(&Param);
5206   auto &FnInfo =
5207       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5208   auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5209   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5210                                     ".red_init.", &CGM.getModule());
5211   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
5212   CodeGenFunction CGF(CGM);
5213   CGF.disableDebugInfo();
5214   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
5215   Address PrivateAddr = CGF.EmitLoadOfPointer(
5216       CGF.GetAddrOfLocalVar(&Param),
5217       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5218   llvm::Value *Size = nullptr;
5219   // If the size of the reduction item is non-constant, load it from global
5220   // threadprivate variable.
5221   if (RCG.getSizes(N).second) {
5222     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5223         CGF, CGM.getContext().getSizeType(),
5224         generateUniqueName("reduction_size", Loc, N));
5225     Size =
5226         CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5227                              CGM.getContext().getSizeType(), SourceLocation());
5228   }
5229   RCG.emitAggregateType(CGF, N, Size);
5230   LValue SharedLVal;
5231   // If initializer uses initializer from declare reduction construct, emit a
5232   // pointer to the address of the original reduction item (reuired by reduction
5233   // initializer)
5234   if (RCG.usesReductionInitializer(N)) {
5235     Address SharedAddr =
5236         CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5237             CGF, CGM.getContext().VoidPtrTy,
5238             generateUniqueName("reduction", Loc, N));
5239     SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5240   } else {
5241     SharedLVal = CGF.MakeNaturalAlignAddrLValue(
5242         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5243         CGM.getContext().VoidPtrTy);
5244   }
5245   // Emit the initializer:
5246   // %0 = bitcast void* %arg to <type>*
5247   // store <type> <init>, <type>* %0
5248   RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
5249                          [](CodeGenFunction &) { return false; });
5250   CGF.FinishFunction();
5251   return Fn;
5252 }
5253 
5254 /// Emits reduction combiner function:
5255 /// \code
5256 /// void @.red_comb(void* %arg0, void* %arg1) {
5257 /// %lhs = bitcast void* %arg0 to <type>*
5258 /// %rhs = bitcast void* %arg1 to <type>*
5259 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5260 /// store <type> %2, <type>* %lhs
5261 /// ret void
5262 /// }
5263 /// \endcode
5264 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5265                                            SourceLocation Loc,
5266                                            ReductionCodeGen &RCG, unsigned N,
5267                                            const Expr *ReductionOp,
5268                                            const Expr *LHS, const Expr *RHS,
5269                                            const Expr *PrivateRef) {
5270   auto &C = CGM.getContext();
5271   auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5272   auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5273   FunctionArgList Args;
5274   ImplicitParamDecl ParamInOut(C, C.VoidPtrTy, ImplicitParamDecl::Other);
5275   ImplicitParamDecl ParamIn(C, C.VoidPtrTy, ImplicitParamDecl::Other);
5276   Args.emplace_back(&ParamInOut);
5277   Args.emplace_back(&ParamIn);
5278   auto &FnInfo =
5279       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5280   auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5281   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5282                                     ".red_comb.", &CGM.getModule());
5283   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
5284   CodeGenFunction CGF(CGM);
5285   CGF.disableDebugInfo();
5286   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
5287   llvm::Value *Size = nullptr;
5288   // If the size of the reduction item is non-constant, load it from global
5289   // threadprivate variable.
5290   if (RCG.getSizes(N).second) {
5291     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5292         CGF, CGM.getContext().getSizeType(),
5293         generateUniqueName("reduction_size", Loc, N));
5294     Size =
5295         CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5296                              CGM.getContext().getSizeType(), SourceLocation());
5297   }
5298   RCG.emitAggregateType(CGF, N, Size);
5299   // Remap lhs and rhs variables to the addresses of the function arguments.
5300   // %lhs = bitcast void* %arg0 to <type>*
5301   // %rhs = bitcast void* %arg1 to <type>*
5302   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5303   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() -> Address {
5304     // Pull out the pointer to the variable.
5305     Address PtrAddr = CGF.EmitLoadOfPointer(
5306         CGF.GetAddrOfLocalVar(&ParamInOut),
5307         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5308     return CGF.Builder.CreateElementBitCast(
5309         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5310   });
5311   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() -> Address {
5312     // Pull out the pointer to the variable.
5313     Address PtrAddr = CGF.EmitLoadOfPointer(
5314         CGF.GetAddrOfLocalVar(&ParamIn),
5315         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5316     return CGF.Builder.CreateElementBitCast(
5317         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5318   });
5319   PrivateScope.Privatize();
5320   // Emit the combiner body:
5321   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5322   // store <type> %2, <type>* %lhs
5323   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5324       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5325       cast<DeclRefExpr>(RHS));
5326   CGF.FinishFunction();
5327   return Fn;
5328 }
5329 
5330 /// Emits reduction finalizer function:
5331 /// \code
5332 /// void @.red_fini(void* %arg) {
5333 /// %0 = bitcast void* %arg to <type>*
5334 /// <destroy>(<type>* %0)
5335 /// ret void
5336 /// }
5337 /// \endcode
5338 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5339                                            SourceLocation Loc,
5340                                            ReductionCodeGen &RCG, unsigned N) {
5341   if (!RCG.needCleanups(N))
5342     return nullptr;
5343   auto &C = CGM.getContext();
5344   FunctionArgList Args;
5345   ImplicitParamDecl Param(C, C.VoidPtrTy, ImplicitParamDecl::Other);
5346   Args.emplace_back(&Param);
5347   auto &FnInfo =
5348       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5349   auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5350   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5351                                     ".red_fini.", &CGM.getModule());
5352   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
5353   CodeGenFunction CGF(CGM);
5354   CGF.disableDebugInfo();
5355   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
5356   Address PrivateAddr = CGF.EmitLoadOfPointer(
5357       CGF.GetAddrOfLocalVar(&Param),
5358       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5359   llvm::Value *Size = nullptr;
5360   // If the size of the reduction item is non-constant, load it from global
5361   // threadprivate variable.
5362   if (RCG.getSizes(N).second) {
5363     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5364         CGF, CGM.getContext().getSizeType(),
5365         generateUniqueName("reduction_size", Loc, N));
5366     Size =
5367         CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5368                              CGM.getContext().getSizeType(), SourceLocation());
5369   }
5370   RCG.emitAggregateType(CGF, N, Size);
5371   // Emit the finalizer body:
5372   // <destroy>(<type>* %0)
5373   RCG.emitCleanups(CGF, N, PrivateAddr);
5374   CGF.FinishFunction();
5375   return Fn;
5376 }
5377 
5378 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5379     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5380     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5381   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5382     return nullptr;
5383 
5384   // Build typedef struct:
5385   // kmp_task_red_input {
5386   //   void *reduce_shar; // shared reduction item
5387   //   size_t reduce_size; // size of data item
5388   //   void *reduce_init; // data initialization routine
5389   //   void *reduce_fini; // data finalization routine
5390   //   void *reduce_comb; // data combiner routine
5391   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
5392   // } kmp_task_red_input_t;
5393   ASTContext &C = CGM.getContext();
5394   auto *RD = C.buildImplicitRecord("kmp_task_red_input_t");
5395   RD->startDefinition();
5396   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5397   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5398   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5399   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5400   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5401   const FieldDecl *FlagsFD = addFieldToRecordDecl(
5402       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5403   RD->completeDefinition();
5404   QualType RDType = C.getRecordType(RD);
5405   unsigned Size = Data.ReductionVars.size();
5406   llvm::APInt ArraySize(/*numBits=*/64, Size);
5407   QualType ArrayRDType = C.getConstantArrayType(
5408       RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0);
5409   // kmp_task_red_input_t .rd_input.[Size];
5410   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5411   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
5412                        Data.ReductionOps);
5413   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5414     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5415     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5416                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5417     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5418         TaskRedInput.getPointer(), Idxs,
5419         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5420         ".rd_input.gep.");
5421     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
5422     // ElemLVal.reduce_shar = &Shareds[Cnt];
5423     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5424     RCG.emitSharedLValue(CGF, Cnt);
5425     llvm::Value *CastedShared =
5426         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer());
5427     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
5428     RCG.emitAggregateType(CGF, Cnt);
5429     llvm::Value *SizeValInChars;
5430     llvm::Value *SizeVal;
5431     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5432     // We use delayed creation/initialization for VLAs, array sections and
5433     // custom reduction initializations. It is required because runtime does not
5434     // provide the way to pass the sizes of VLAs/array sections to
5435     // initializer/combiner/finalizer functions and does not pass the pointer to
5436     // original reduction item to the initializer. Instead threadprivate global
5437     // variables are used to store these values and use them in the functions.
5438     bool DelayedCreation = !!SizeVal;
5439     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5440                                                /*isSigned=*/false);
5441     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5442     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5443     // ElemLVal.reduce_init = init;
5444     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5445     llvm::Value *InitAddr =
5446         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
5447     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5448     DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
5449     // ElemLVal.reduce_fini = fini;
5450     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5451     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5452     llvm::Value *FiniAddr = Fini
5453                                 ? CGF.EmitCastToVoidPtr(Fini)
5454                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5455     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5456     // ElemLVal.reduce_comb = comb;
5457     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5458     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
5459         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5460         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
5461     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5462     // ElemLVal.flags = 0;
5463     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5464     if (DelayedCreation) {
5465       CGF.EmitStoreOfScalar(
5466           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*IsSigned=*/true),
5467           FlagsLVal);
5468     } else
5469       CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
5470   }
5471   // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
5472   // *data);
5473   llvm::Value *Args[] = {
5474       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5475                                 /*isSigned=*/true),
5476       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5477       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
5478                                                       CGM.VoidPtrTy)};
5479   return CGF.EmitRuntimeCall(
5480       createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args);
5481 }
5482 
5483 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
5484                                               SourceLocation Loc,
5485                                               ReductionCodeGen &RCG,
5486                                               unsigned N) {
5487   auto Sizes = RCG.getSizes(N);
5488   // Emit threadprivate global variable if the type is non-constant
5489   // (Sizes.second = nullptr).
5490   if (Sizes.second) {
5491     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
5492                                                      /*isSigned=*/false);
5493     Address SizeAddr = getAddrOfArtificialThreadPrivate(
5494         CGF, CGM.getContext().getSizeType(),
5495         generateUniqueName("reduction_size", Loc, N));
5496     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
5497   }
5498   // Store address of the original reduction item if custom initializer is used.
5499   if (RCG.usesReductionInitializer(N)) {
5500     Address SharedAddr = getAddrOfArtificialThreadPrivate(
5501         CGF, CGM.getContext().VoidPtrTy,
5502         generateUniqueName("reduction", Loc, N));
5503     CGF.Builder.CreateStore(
5504         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5505             RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy),
5506         SharedAddr, /*IsVolatile=*/false);
5507   }
5508 }
5509 
5510 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
5511                                               SourceLocation Loc,
5512                                               llvm::Value *ReductionsPtr,
5513                                               LValue SharedLVal) {
5514   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5515   // *d);
5516   llvm::Value *Args[] = {
5517       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5518                                 /*isSigned=*/true),
5519       ReductionsPtr,
5520       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(),
5521                                                       CGM.VoidPtrTy)};
5522   return Address(
5523       CGF.EmitRuntimeCall(
5524           createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
5525       SharedLVal.getAlignment());
5526 }
5527 
5528 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
5529                                        SourceLocation Loc) {
5530   if (!CGF.HaveInsertPoint())
5531     return;
5532   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5533   // global_tid);
5534   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
5535   // Ignore return result until untied tasks are supported.
5536   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
5537   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5538     Region->emitUntiedSwitch(CGF);
5539 }
5540 
5541 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
5542                                            OpenMPDirectiveKind InnerKind,
5543                                            const RegionCodeGenTy &CodeGen,
5544                                            bool HasCancel) {
5545   if (!CGF.HaveInsertPoint())
5546     return;
5547   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
5548   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5549 }
5550 
5551 namespace {
5552 enum RTCancelKind {
5553   CancelNoreq = 0,
5554   CancelParallel = 1,
5555   CancelLoop = 2,
5556   CancelSections = 3,
5557   CancelTaskgroup = 4
5558 };
5559 } // anonymous namespace
5560 
5561 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
5562   RTCancelKind CancelKind = CancelNoreq;
5563   if (CancelRegion == OMPD_parallel)
5564     CancelKind = CancelParallel;
5565   else if (CancelRegion == OMPD_for)
5566     CancelKind = CancelLoop;
5567   else if (CancelRegion == OMPD_sections)
5568     CancelKind = CancelSections;
5569   else {
5570     assert(CancelRegion == OMPD_taskgroup);
5571     CancelKind = CancelTaskgroup;
5572   }
5573   return CancelKind;
5574 }
5575 
5576 void CGOpenMPRuntime::emitCancellationPointCall(
5577     CodeGenFunction &CGF, SourceLocation Loc,
5578     OpenMPDirectiveKind CancelRegion) {
5579   if (!CGF.HaveInsertPoint())
5580     return;
5581   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5582   // global_tid, kmp_int32 cncl_kind);
5583   if (auto *OMPRegionInfo =
5584           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5585     // For 'cancellation point taskgroup', the task region info may not have a
5586     // cancel. This may instead happen in another adjacent task.
5587     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
5588       llvm::Value *Args[] = {
5589           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
5590           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5591       // Ignore return result until untied tasks are supported.
5592       auto *Result = CGF.EmitRuntimeCall(
5593           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
5594       // if (__kmpc_cancellationpoint()) {
5595       //   exit from construct;
5596       // }
5597       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
5598       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
5599       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
5600       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5601       CGF.EmitBlock(ExitBB);
5602       // exit from construct;
5603       auto CancelDest =
5604           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5605       CGF.EmitBranchThroughCleanup(CancelDest);
5606       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5607     }
5608   }
5609 }
5610 
5611 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
5612                                      const Expr *IfCond,
5613                                      OpenMPDirectiveKind CancelRegion) {
5614   if (!CGF.HaveInsertPoint())
5615     return;
5616   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
5617   // kmp_int32 cncl_kind);
5618   if (auto *OMPRegionInfo =
5619           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5620     auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
5621                                                         PrePostActionTy &) {
5622       auto &RT = CGF.CGM.getOpenMPRuntime();
5623       llvm::Value *Args[] = {
5624           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
5625           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5626       // Ignore return result until untied tasks are supported.
5627       auto *Result = CGF.EmitRuntimeCall(
5628           RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
5629       // if (__kmpc_cancel()) {
5630       //   exit from construct;
5631       // }
5632       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
5633       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
5634       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
5635       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5636       CGF.EmitBlock(ExitBB);
5637       // exit from construct;
5638       auto CancelDest =
5639           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5640       CGF.EmitBranchThroughCleanup(CancelDest);
5641       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5642     };
5643     if (IfCond)
5644       emitOMPIfClause(CGF, IfCond, ThenGen,
5645                       [](CodeGenFunction &, PrePostActionTy &) {});
5646     else {
5647       RegionCodeGenTy ThenRCG(ThenGen);
5648       ThenRCG(CGF);
5649     }
5650   }
5651 }
5652 
5653 /// \brief Obtain information that uniquely identifies a target entry. This
5654 /// consists of the file and device IDs as well as line number associated with
5655 /// the relevant entry source location.
5656 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
5657                                      unsigned &DeviceID, unsigned &FileID,
5658                                      unsigned &LineNum) {
5659 
5660   auto &SM = C.getSourceManager();
5661 
5662   // The loc should be always valid and have a file ID (the user cannot use
5663   // #pragma directives in macros)
5664 
5665   assert(Loc.isValid() && "Source location is expected to be always valid.");
5666   assert(Loc.isFileID() && "Source location is expected to refer to a file.");
5667 
5668   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
5669   assert(PLoc.isValid() && "Source location is expected to be always valid.");
5670 
5671   llvm::sys::fs::UniqueID ID;
5672   if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
5673     llvm_unreachable("Source file with target region no longer exists!");
5674 
5675   DeviceID = ID.getDevice();
5676   FileID = ID.getFile();
5677   LineNum = PLoc.getLine();
5678 }
5679 
5680 void CGOpenMPRuntime::emitTargetOutlinedFunction(
5681     const OMPExecutableDirective &D, StringRef ParentName,
5682     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5683     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5684   assert(!ParentName.empty() && "Invalid target region parent name!");
5685 
5686   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
5687                                    IsOffloadEntry, CodeGen);
5688 }
5689 
5690 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
5691     const OMPExecutableDirective &D, StringRef ParentName,
5692     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5693     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5694   // Create a unique name for the entry function using the source location
5695   // information of the current target region. The name will be something like:
5696   //
5697   // __omp_offloading_DD_FFFF_PP_lBB
5698   //
5699   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
5700   // mangled name of the function that encloses the target region and BB is the
5701   // line number of the target region.
5702 
5703   unsigned DeviceID;
5704   unsigned FileID;
5705   unsigned Line;
5706   getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID,
5707                            Line);
5708   SmallString<64> EntryFnName;
5709   {
5710     llvm::raw_svector_ostream OS(EntryFnName);
5711     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
5712        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
5713   }
5714 
5715   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
5716 
5717   CodeGenFunction CGF(CGM, true);
5718   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
5719   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
5720 
5721   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
5722 
5723   // If this target outline function is not an offload entry, we don't need to
5724   // register it.
5725   if (!IsOffloadEntry)
5726     return;
5727 
5728   // The target region ID is used by the runtime library to identify the current
5729   // target region, so it only has to be unique and not necessarily point to
5730   // anything. It could be the pointer to the outlined function that implements
5731   // the target region, but we aren't using that so that the compiler doesn't
5732   // need to keep that, and could therefore inline the host function if proven
5733   // worthwhile during optimization. In the other hand, if emitting code for the
5734   // device, the ID has to be the function address so that it can retrieved from
5735   // the offloading entry and launched by the runtime library. We also mark the
5736   // outlined function to have external linkage in case we are emitting code for
5737   // the device, because these functions will be entry points to the device.
5738 
5739   if (CGM.getLangOpts().OpenMPIsDevice) {
5740     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
5741     OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage);
5742   } else
5743     OutlinedFnID = new llvm::GlobalVariable(
5744         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
5745         llvm::GlobalValue::PrivateLinkage,
5746         llvm::Constant::getNullValue(CGM.Int8Ty), ".omp_offload.region_id");
5747 
5748   // Register the information for the entry associated with this target region.
5749   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
5750       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
5751       /*Flags=*/0);
5752 }
5753 
5754 /// discard all CompoundStmts intervening between two constructs
5755 static const Stmt *ignoreCompoundStmts(const Stmt *Body) {
5756   while (auto *CS = dyn_cast_or_null<CompoundStmt>(Body))
5757     Body = CS->body_front();
5758 
5759   return Body;
5760 }
5761 
5762 /// Emit the number of teams for a target directive.  Inspect the num_teams
5763 /// clause associated with a teams construct combined or closely nested
5764 /// with the target directive.
5765 ///
5766 /// Emit a team of size one for directives such as 'target parallel' that
5767 /// have no associated teams construct.
5768 ///
5769 /// Otherwise, return nullptr.
5770 static llvm::Value *
5771 emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
5772                                CodeGenFunction &CGF,
5773                                const OMPExecutableDirective &D) {
5774 
5775   assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
5776                                               "teams directive expected to be "
5777                                               "emitted only for the host!");
5778 
5779   auto &Bld = CGF.Builder;
5780 
5781   // If the target directive is combined with a teams directive:
5782   //   Return the value in the num_teams clause, if any.
5783   //   Otherwise, return 0 to denote the runtime default.
5784   if (isOpenMPTeamsDirective(D.getDirectiveKind())) {
5785     if (const auto *NumTeamsClause = D.getSingleClause<OMPNumTeamsClause>()) {
5786       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
5787       auto NumTeams = CGF.EmitScalarExpr(NumTeamsClause->getNumTeams(),
5788                                          /*IgnoreResultAssign*/ true);
5789       return Bld.CreateIntCast(NumTeams, CGF.Int32Ty,
5790                                /*IsSigned=*/true);
5791     }
5792 
5793     // The default value is 0.
5794     return Bld.getInt32(0);
5795   }
5796 
5797   // If the target directive is combined with a parallel directive but not a
5798   // teams directive, start one team.
5799   if (isOpenMPParallelDirective(D.getDirectiveKind()))
5800     return Bld.getInt32(1);
5801 
5802   // If the current target region has a teams region enclosed, we need to get
5803   // the number of teams to pass to the runtime function call. This is done
5804   // by generating the expression in a inlined region. This is required because
5805   // the expression is captured in the enclosing target environment when the
5806   // teams directive is not combined with target.
5807 
5808   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
5809 
5810   // FIXME: Accommodate other combined directives with teams when they become
5811   // available.
5812   if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>(
5813           ignoreCompoundStmts(CS.getCapturedStmt()))) {
5814     if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) {
5815       CGOpenMPInnerExprInfo CGInfo(CGF, CS);
5816       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
5817       llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams());
5818       return Bld.CreateIntCast(NumTeams, CGF.Int32Ty,
5819                                /*IsSigned=*/true);
5820     }
5821 
5822     // If we have an enclosed teams directive but no num_teams clause we use
5823     // the default value 0.
5824     return Bld.getInt32(0);
5825   }
5826 
5827   // No teams associated with the directive.
5828   return nullptr;
5829 }
5830 
5831 /// Emit the number of threads for a target directive.  Inspect the
5832 /// thread_limit clause associated with a teams construct combined or closely
5833 /// nested with the target directive.
5834 ///
5835 /// Emit the num_threads clause for directives such as 'target parallel' that
5836 /// have no associated teams construct.
5837 ///
5838 /// Otherwise, return nullptr.
5839 static llvm::Value *
5840 emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
5841                                  CodeGenFunction &CGF,
5842                                  const OMPExecutableDirective &D) {
5843 
5844   assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
5845                                               "teams directive expected to be "
5846                                               "emitted only for the host!");
5847 
5848   auto &Bld = CGF.Builder;
5849 
5850   //
5851   // If the target directive is combined with a teams directive:
5852   //   Return the value in the thread_limit clause, if any.
5853   //
5854   // If the target directive is combined with a parallel directive:
5855   //   Return the value in the num_threads clause, if any.
5856   //
5857   // If both clauses are set, select the minimum of the two.
5858   //
5859   // If neither teams or parallel combined directives set the number of threads
5860   // in a team, return 0 to denote the runtime default.
5861   //
5862   // If this is not a teams directive return nullptr.
5863 
5864   if (isOpenMPTeamsDirective(D.getDirectiveKind()) ||
5865       isOpenMPParallelDirective(D.getDirectiveKind())) {
5866     llvm::Value *DefaultThreadLimitVal = Bld.getInt32(0);
5867     llvm::Value *NumThreadsVal = nullptr;
5868     llvm::Value *ThreadLimitVal = nullptr;
5869 
5870     if (const auto *ThreadLimitClause =
5871             D.getSingleClause<OMPThreadLimitClause>()) {
5872       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
5873       auto ThreadLimit = CGF.EmitScalarExpr(ThreadLimitClause->getThreadLimit(),
5874                                             /*IgnoreResultAssign*/ true);
5875       ThreadLimitVal = Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty,
5876                                          /*IsSigned=*/true);
5877     }
5878 
5879     if (const auto *NumThreadsClause =
5880             D.getSingleClause<OMPNumThreadsClause>()) {
5881       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
5882       llvm::Value *NumThreads =
5883           CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
5884                              /*IgnoreResultAssign*/ true);
5885       NumThreadsVal =
5886           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/true);
5887     }
5888 
5889     // Select the lesser of thread_limit and num_threads.
5890     if (NumThreadsVal)
5891       ThreadLimitVal = ThreadLimitVal
5892                            ? Bld.CreateSelect(Bld.CreateICmpSLT(NumThreadsVal,
5893                                                                 ThreadLimitVal),
5894                                               NumThreadsVal, ThreadLimitVal)
5895                            : NumThreadsVal;
5896 
5897     // Set default value passed to the runtime if either teams or a target
5898     // parallel type directive is found but no clause is specified.
5899     if (!ThreadLimitVal)
5900       ThreadLimitVal = DefaultThreadLimitVal;
5901 
5902     return ThreadLimitVal;
5903   }
5904 
5905   // If the current target region has a teams region enclosed, we need to get
5906   // the thread limit to pass to the runtime function call. This is done
5907   // by generating the expression in a inlined region. This is required because
5908   // the expression is captured in the enclosing target environment when the
5909   // teams directive is not combined with target.
5910 
5911   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
5912 
5913   // FIXME: Accommodate other combined directives with teams when they become
5914   // available.
5915   if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>(
5916           ignoreCompoundStmts(CS.getCapturedStmt()))) {
5917     if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) {
5918       CGOpenMPInnerExprInfo CGInfo(CGF, CS);
5919       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
5920       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit());
5921       return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty,
5922                                        /*IsSigned=*/true);
5923     }
5924 
5925     // If we have an enclosed teams directive but no thread_limit clause we use
5926     // the default value 0.
5927     return CGF.Builder.getInt32(0);
5928   }
5929 
5930   // No teams associated with the directive.
5931   return nullptr;
5932 }
5933 
5934 namespace {
5935 // \brief Utility to handle information from clauses associated with a given
5936 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
5937 // It provides a convenient interface to obtain the information and generate
5938 // code for that information.
5939 class MappableExprsHandler {
5940 public:
5941   /// \brief Values for bit flags used to specify the mapping type for
5942   /// offloading.
5943   enum OpenMPOffloadMappingFlags {
5944     /// \brief Allocate memory on the device and move data from host to device.
5945     OMP_MAP_TO = 0x01,
5946     /// \brief Allocate memory on the device and move data from device to host.
5947     OMP_MAP_FROM = 0x02,
5948     /// \brief Always perform the requested mapping action on the element, even
5949     /// if it was already mapped before.
5950     OMP_MAP_ALWAYS = 0x04,
5951     /// \brief Delete the element from the device environment, ignoring the
5952     /// current reference count associated with the element.
5953     OMP_MAP_DELETE = 0x08,
5954     /// \brief The element being mapped is a pointer, therefore the pointee
5955     /// should be mapped as well.
5956     OMP_MAP_IS_PTR = 0x10,
5957     /// \brief This flags signals that an argument is the first one relating to
5958     /// a map/private clause expression. For some cases a single
5959     /// map/privatization results in multiple arguments passed to the runtime
5960     /// library.
5961     OMP_MAP_FIRST_REF = 0x20,
5962     /// \brief Signal that the runtime library has to return the device pointer
5963     /// in the current position for the data being mapped.
5964     OMP_MAP_RETURN_PTR = 0x40,
5965     /// \brief This flag signals that the reference being passed is a pointer to
5966     /// private data.
5967     OMP_MAP_PRIVATE_PTR = 0x80,
5968     /// \brief Pass the element to the device by value.
5969     OMP_MAP_PRIVATE_VAL = 0x100,
5970     /// Implicit map
5971     OMP_MAP_IMPLICIT = 0x200,
5972   };
5973 
5974   /// Class that associates information with a base pointer to be passed to the
5975   /// runtime library.
5976   class BasePointerInfo {
5977     /// The base pointer.
5978     llvm::Value *Ptr = nullptr;
5979     /// The base declaration that refers to this device pointer, or null if
5980     /// there is none.
5981     const ValueDecl *DevPtrDecl = nullptr;
5982 
5983   public:
5984     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
5985         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
5986     llvm::Value *operator*() const { return Ptr; }
5987     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
5988     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
5989   };
5990 
5991   typedef SmallVector<BasePointerInfo, 16> MapBaseValuesArrayTy;
5992   typedef SmallVector<llvm::Value *, 16> MapValuesArrayTy;
5993   typedef SmallVector<unsigned, 16> MapFlagsArrayTy;
5994 
5995 private:
5996   /// \brief Directive from where the map clauses were extracted.
5997   const OMPExecutableDirective &CurDir;
5998 
5999   /// \brief Function the directive is being generated for.
6000   CodeGenFunction &CGF;
6001 
6002   /// \brief Set of all first private variables in the current directive.
6003   llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls;
6004 
6005   /// Map between device pointer declarations and their expression components.
6006   /// The key value for declarations in 'this' is null.
6007   llvm::DenseMap<
6008       const ValueDecl *,
6009       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6010       DevPointersMap;
6011 
6012   llvm::Value *getExprTypeSize(const Expr *E) const {
6013     auto ExprTy = E->getType().getCanonicalType();
6014 
6015     // Reference types are ignored for mapping purposes.
6016     if (auto *RefTy = ExprTy->getAs<ReferenceType>())
6017       ExprTy = RefTy->getPointeeType().getCanonicalType();
6018 
6019     // Given that an array section is considered a built-in type, we need to
6020     // do the calculation based on the length of the section instead of relying
6021     // on CGF.getTypeSize(E->getType()).
6022     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
6023       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
6024                             OAE->getBase()->IgnoreParenImpCasts())
6025                             .getCanonicalType();
6026 
6027       // If there is no length associated with the expression, that means we
6028       // are using the whole length of the base.
6029       if (!OAE->getLength() && OAE->getColonLoc().isValid())
6030         return CGF.getTypeSize(BaseTy);
6031 
6032       llvm::Value *ElemSize;
6033       if (auto *PTy = BaseTy->getAs<PointerType>())
6034         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
6035       else {
6036         auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
6037         assert(ATy && "Expecting array type if not a pointer type.");
6038         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
6039       }
6040 
6041       // If we don't have a length at this point, that is because we have an
6042       // array section with a single element.
6043       if (!OAE->getLength())
6044         return ElemSize;
6045 
6046       auto *LengthVal = CGF.EmitScalarExpr(OAE->getLength());
6047       LengthVal =
6048           CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false);
6049       return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
6050     }
6051     return CGF.getTypeSize(ExprTy);
6052   }
6053 
6054   /// \brief Return the corresponding bits for a given map clause modifier. Add
6055   /// a flag marking the map as a pointer if requested. Add a flag marking the
6056   /// map as the first one of a series of maps that relate to the same map
6057   /// expression.
6058   unsigned getMapTypeBits(OpenMPMapClauseKind MapType,
6059                           OpenMPMapClauseKind MapTypeModifier, bool AddPtrFlag,
6060                           bool AddIsFirstFlag) const {
6061     unsigned Bits = 0u;
6062     switch (MapType) {
6063     case OMPC_MAP_alloc:
6064     case OMPC_MAP_release:
6065       // alloc and release is the default behavior in the runtime library,  i.e.
6066       // if we don't pass any bits alloc/release that is what the runtime is
6067       // going to do. Therefore, we don't need to signal anything for these two
6068       // type modifiers.
6069       break;
6070     case OMPC_MAP_to:
6071       Bits = OMP_MAP_TO;
6072       break;
6073     case OMPC_MAP_from:
6074       Bits = OMP_MAP_FROM;
6075       break;
6076     case OMPC_MAP_tofrom:
6077       Bits = OMP_MAP_TO | OMP_MAP_FROM;
6078       break;
6079     case OMPC_MAP_delete:
6080       Bits = OMP_MAP_DELETE;
6081       break;
6082     default:
6083       llvm_unreachable("Unexpected map type!");
6084       break;
6085     }
6086     if (AddPtrFlag)
6087       Bits |= OMP_MAP_IS_PTR;
6088     if (AddIsFirstFlag)
6089       Bits |= OMP_MAP_FIRST_REF;
6090     if (MapTypeModifier == OMPC_MAP_always)
6091       Bits |= OMP_MAP_ALWAYS;
6092     return Bits;
6093   }
6094 
6095   /// \brief Return true if the provided expression is a final array section. A
6096   /// final array section, is one whose length can't be proved to be one.
6097   bool isFinalArraySectionExpression(const Expr *E) const {
6098     auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
6099 
6100     // It is not an array section and therefore not a unity-size one.
6101     if (!OASE)
6102       return false;
6103 
6104     // An array section with no colon always refer to a single element.
6105     if (OASE->getColonLoc().isInvalid())
6106       return false;
6107 
6108     auto *Length = OASE->getLength();
6109 
6110     // If we don't have a length we have to check if the array has size 1
6111     // for this dimension. Also, we should always expect a length if the
6112     // base type is pointer.
6113     if (!Length) {
6114       auto BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
6115                          OASE->getBase()->IgnoreParenImpCasts())
6116                          .getCanonicalType();
6117       if (auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
6118         return ATy->getSize().getSExtValue() != 1;
6119       // If we don't have a constant dimension length, we have to consider
6120       // the current section as having any size, so it is not necessarily
6121       // unitary. If it happen to be unity size, that's user fault.
6122       return true;
6123     }
6124 
6125     // Check if the length evaluates to 1.
6126     llvm::APSInt ConstLength;
6127     if (!Length->EvaluateAsInt(ConstLength, CGF.getContext()))
6128       return true; // Can have more that size 1.
6129 
6130     return ConstLength.getSExtValue() != 1;
6131   }
6132 
6133   /// \brief Generate the base pointers, section pointers, sizes and map type
6134   /// bits for the provided map type, map modifier, and expression components.
6135   /// \a IsFirstComponent should be set to true if the provided set of
6136   /// components is the first associated with a capture.
6137   void generateInfoForComponentList(
6138       OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier,
6139       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6140       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
6141       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
6142       bool IsFirstComponentList, bool IsImplicit) const {
6143 
6144     // The following summarizes what has to be generated for each map and the
6145     // types bellow. The generated information is expressed in this order:
6146     // base pointer, section pointer, size, flags
6147     // (to add to the ones that come from the map type and modifier).
6148     //
6149     // double d;
6150     // int i[100];
6151     // float *p;
6152     //
6153     // struct S1 {
6154     //   int i;
6155     //   float f[50];
6156     // }
6157     // struct S2 {
6158     //   int i;
6159     //   float f[50];
6160     //   S1 s;
6161     //   double *p;
6162     //   struct S2 *ps;
6163     // }
6164     // S2 s;
6165     // S2 *ps;
6166     //
6167     // map(d)
6168     // &d, &d, sizeof(double), noflags
6169     //
6170     // map(i)
6171     // &i, &i, 100*sizeof(int), noflags
6172     //
6173     // map(i[1:23])
6174     // &i(=&i[0]), &i[1], 23*sizeof(int), noflags
6175     //
6176     // map(p)
6177     // &p, &p, sizeof(float*), noflags
6178     //
6179     // map(p[1:24])
6180     // p, &p[1], 24*sizeof(float), noflags
6181     //
6182     // map(s)
6183     // &s, &s, sizeof(S2), noflags
6184     //
6185     // map(s.i)
6186     // &s, &(s.i), sizeof(int), noflags
6187     //
6188     // map(s.s.f)
6189     // &s, &(s.i.f), 50*sizeof(int), noflags
6190     //
6191     // map(s.p)
6192     // &s, &(s.p), sizeof(double*), noflags
6193     //
6194     // map(s.p[:22], s.a s.b)
6195     // &s, &(s.p), sizeof(double*), noflags
6196     // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag + extra_flag
6197     //
6198     // map(s.ps)
6199     // &s, &(s.ps), sizeof(S2*), noflags
6200     //
6201     // map(s.ps->s.i)
6202     // &s, &(s.ps), sizeof(S2*), noflags
6203     // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag + extra_flag
6204     //
6205     // map(s.ps->ps)
6206     // &s, &(s.ps), sizeof(S2*), noflags
6207     // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
6208     //
6209     // map(s.ps->ps->ps)
6210     // &s, &(s.ps), sizeof(S2*), noflags
6211     // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
6212     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
6213     //
6214     // map(s.ps->ps->s.f[:22])
6215     // &s, &(s.ps), sizeof(S2*), noflags
6216     // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
6217     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + extra_flag
6218     //
6219     // map(ps)
6220     // &ps, &ps, sizeof(S2*), noflags
6221     //
6222     // map(ps->i)
6223     // ps, &(ps->i), sizeof(int), noflags
6224     //
6225     // map(ps->s.f)
6226     // ps, &(ps->s.f[0]), 50*sizeof(float), noflags
6227     //
6228     // map(ps->p)
6229     // ps, &(ps->p), sizeof(double*), noflags
6230     //
6231     // map(ps->p[:22])
6232     // ps, &(ps->p), sizeof(double*), noflags
6233     // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag + extra_flag
6234     //
6235     // map(ps->ps)
6236     // ps, &(ps->ps), sizeof(S2*), noflags
6237     //
6238     // map(ps->ps->s.i)
6239     // ps, &(ps->ps), sizeof(S2*), noflags
6240     // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag + extra_flag
6241     //
6242     // map(ps->ps->ps)
6243     // ps, &(ps->ps), sizeof(S2*), noflags
6244     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
6245     //
6246     // map(ps->ps->ps->ps)
6247     // ps, &(ps->ps), sizeof(S2*), noflags
6248     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
6249     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
6250     //
6251     // map(ps->ps->ps->s.f[:22])
6252     // ps, &(ps->ps), sizeof(S2*), noflags
6253     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
6254     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag +
6255     // extra_flag
6256 
6257     // Track if the map information being generated is the first for a capture.
6258     bool IsCaptureFirstInfo = IsFirstComponentList;
6259 
6260     // Scan the components from the base to the complete expression.
6261     auto CI = Components.rbegin();
6262     auto CE = Components.rend();
6263     auto I = CI;
6264 
6265     // Track if the map information being generated is the first for a list of
6266     // components.
6267     bool IsExpressionFirstInfo = true;
6268     llvm::Value *BP = nullptr;
6269 
6270     if (auto *ME = dyn_cast<MemberExpr>(I->getAssociatedExpression())) {
6271       // The base is the 'this' pointer. The content of the pointer is going
6272       // to be the base of the field being mapped.
6273       BP = CGF.EmitScalarExpr(ME->getBase());
6274     } else {
6275       // The base is the reference to the variable.
6276       // BP = &Var.
6277       BP = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getPointer();
6278 
6279       // If the variable is a pointer and is being dereferenced (i.e. is not
6280       // the last component), the base has to be the pointer itself, not its
6281       // reference. References are ignored for mapping purposes.
6282       QualType Ty =
6283           I->getAssociatedDeclaration()->getType().getNonReferenceType();
6284       if (Ty->isAnyPointerType() && std::next(I) != CE) {
6285         auto PtrAddr = CGF.MakeNaturalAlignAddrLValue(BP, Ty);
6286         BP = CGF.EmitLoadOfPointerLValue(PtrAddr.getAddress(),
6287                                          Ty->castAs<PointerType>())
6288                  .getPointer();
6289 
6290         // We do not need to generate individual map information for the
6291         // pointer, it can be associated with the combined storage.
6292         ++I;
6293       }
6294     }
6295 
6296     unsigned DefaultFlags = IsImplicit ? OMP_MAP_IMPLICIT : 0;
6297     for (; I != CE; ++I) {
6298       auto Next = std::next(I);
6299 
6300       // We need to generate the addresses and sizes if this is the last
6301       // component, if the component is a pointer or if it is an array section
6302       // whose length can't be proved to be one. If this is a pointer, it
6303       // becomes the base address for the following components.
6304 
6305       // A final array section, is one whose length can't be proved to be one.
6306       bool IsFinalArraySection =
6307           isFinalArraySectionExpression(I->getAssociatedExpression());
6308 
6309       // Get information on whether the element is a pointer. Have to do a
6310       // special treatment for array sections given that they are built-in
6311       // types.
6312       const auto *OASE =
6313           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
6314       bool IsPointer =
6315           (OASE &&
6316            OMPArraySectionExpr::getBaseOriginalType(OASE)
6317                .getCanonicalType()
6318                ->isAnyPointerType()) ||
6319           I->getAssociatedExpression()->getType()->isAnyPointerType();
6320 
6321       if (Next == CE || IsPointer || IsFinalArraySection) {
6322 
6323         // If this is not the last component, we expect the pointer to be
6324         // associated with an array expression or member expression.
6325         assert((Next == CE ||
6326                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
6327                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
6328                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
6329                "Unexpected expression");
6330 
6331         llvm::Value *LB =
6332             CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getPointer();
6333         auto *Size = getExprTypeSize(I->getAssociatedExpression());
6334 
6335         // If we have a member expression and the current component is a
6336         // reference, we have to map the reference too. Whenever we have a
6337         // reference, the section that reference refers to is going to be a
6338         // load instruction from the storage assigned to the reference.
6339         if (isa<MemberExpr>(I->getAssociatedExpression()) &&
6340             I->getAssociatedDeclaration()->getType()->isReferenceType()) {
6341           auto *LI = cast<llvm::LoadInst>(LB);
6342           auto *RefAddr = LI->getPointerOperand();
6343 
6344           BasePointers.push_back(BP);
6345           Pointers.push_back(RefAddr);
6346           Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
6347           Types.push_back(DefaultFlags |
6348                           getMapTypeBits(
6349                               /*MapType*/ OMPC_MAP_alloc,
6350                               /*MapTypeModifier=*/OMPC_MAP_unknown,
6351                               !IsExpressionFirstInfo, IsCaptureFirstInfo));
6352           IsExpressionFirstInfo = false;
6353           IsCaptureFirstInfo = false;
6354           // The reference will be the next base address.
6355           BP = RefAddr;
6356         }
6357 
6358         BasePointers.push_back(BP);
6359         Pointers.push_back(LB);
6360         Sizes.push_back(Size);
6361 
6362         // We need to add a pointer flag for each map that comes from the
6363         // same expression except for the first one. We also need to signal
6364         // this map is the first one that relates with the current capture
6365         // (there is a set of entries for each capture).
6366         Types.push_back(DefaultFlags | getMapTypeBits(MapType, MapTypeModifier,
6367                                                       !IsExpressionFirstInfo,
6368                                                       IsCaptureFirstInfo));
6369 
6370         // If we have a final array section, we are done with this expression.
6371         if (IsFinalArraySection)
6372           break;
6373 
6374         // The pointer becomes the base for the next element.
6375         if (Next != CE)
6376           BP = LB;
6377 
6378         IsExpressionFirstInfo = false;
6379         IsCaptureFirstInfo = false;
6380       }
6381     }
6382   }
6383 
6384   /// \brief Return the adjusted map modifiers if the declaration a capture
6385   /// refers to appears in a first-private clause. This is expected to be used
6386   /// only with directives that start with 'target'.
6387   unsigned adjustMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap,
6388                                                unsigned CurrentModifiers) {
6389     assert(Cap.capturesVariable() && "Expected capture by reference only!");
6390 
6391     // A first private variable captured by reference will use only the
6392     // 'private ptr' and 'map to' flag. Return the right flags if the captured
6393     // declaration is known as first-private in this handler.
6394     if (FirstPrivateDecls.count(Cap.getCapturedVar()))
6395       return MappableExprsHandler::OMP_MAP_PRIVATE_PTR |
6396              MappableExprsHandler::OMP_MAP_TO;
6397 
6398     // We didn't modify anything.
6399     return CurrentModifiers;
6400   }
6401 
6402 public:
6403   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
6404       : CurDir(Dir), CGF(CGF) {
6405     // Extract firstprivate clause information.
6406     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
6407       for (const auto *D : C->varlists())
6408         FirstPrivateDecls.insert(
6409             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
6410     // Extract device pointer clause information.
6411     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
6412       for (auto L : C->component_lists())
6413         DevPointersMap[L.first].push_back(L.second);
6414   }
6415 
6416   /// \brief Generate all the base pointers, section pointers, sizes and map
6417   /// types for the extracted mappable expressions. Also, for each item that
6418   /// relates with a device pointer, a pair of the relevant declaration and
6419   /// index where it occurs is appended to the device pointers info array.
6420   void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
6421                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
6422                        MapFlagsArrayTy &Types) const {
6423     BasePointers.clear();
6424     Pointers.clear();
6425     Sizes.clear();
6426     Types.clear();
6427 
6428     struct MapInfo {
6429       /// Kind that defines how a device pointer has to be returned.
6430       enum ReturnPointerKind {
6431         // Don't have to return any pointer.
6432         RPK_None,
6433         // Pointer is the base of the declaration.
6434         RPK_Base,
6435         // Pointer is a member of the base declaration - 'this'
6436         RPK_Member,
6437         // Pointer is a reference and a member of the base declaration - 'this'
6438         RPK_MemberReference,
6439       };
6440       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
6441       OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
6442       OpenMPMapClauseKind MapTypeModifier = OMPC_MAP_unknown;
6443       ReturnPointerKind ReturnDevicePointer = RPK_None;
6444       bool IsImplicit = false;
6445 
6446       MapInfo() = default;
6447       MapInfo(
6448           OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6449           OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier,
6450           ReturnPointerKind ReturnDevicePointer, bool IsImplicit)
6451           : Components(Components), MapType(MapType),
6452             MapTypeModifier(MapTypeModifier),
6453             ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
6454     };
6455 
6456     // We have to process the component lists that relate with the same
6457     // declaration in a single chunk so that we can generate the map flags
6458     // correctly. Therefore, we organize all lists in a map.
6459     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
6460 
6461     // Helper function to fill the information map for the different supported
6462     // clauses.
6463     auto &&InfoGen = [&Info](
6464         const ValueDecl *D,
6465         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
6466         OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier,
6467         MapInfo::ReturnPointerKind ReturnDevicePointer, bool IsImplicit) {
6468       const ValueDecl *VD =
6469           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
6470       Info[VD].emplace_back(L, MapType, MapModifier, ReturnDevicePointer,
6471                             IsImplicit);
6472     };
6473 
6474     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
6475     for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
6476       for (auto L : C->component_lists()) {
6477         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier(),
6478                 MapInfo::RPK_None, C->isImplicit());
6479       }
6480     for (auto *C : this->CurDir.getClausesOfKind<OMPToClause>())
6481       for (auto L : C->component_lists()) {
6482         InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown,
6483                 MapInfo::RPK_None, C->isImplicit());
6484       }
6485     for (auto *C : this->CurDir.getClausesOfKind<OMPFromClause>())
6486       for (auto L : C->component_lists()) {
6487         InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown,
6488                 MapInfo::RPK_None, C->isImplicit());
6489       }
6490 
6491     // Look at the use_device_ptr clause information and mark the existing map
6492     // entries as such. If there is no map information for an entry in the
6493     // use_device_ptr list, we create one with map type 'alloc' and zero size
6494     // section. It is the user fault if that was not mapped before.
6495     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
6496     for (auto *C : this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>())
6497       for (auto L : C->component_lists()) {
6498         assert(!L.second.empty() && "Not expecting empty list of components!");
6499         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
6500         VD = cast<ValueDecl>(VD->getCanonicalDecl());
6501         auto *IE = L.second.back().getAssociatedExpression();
6502         // If the first component is a member expression, we have to look into
6503         // 'this', which maps to null in the map of map information. Otherwise
6504         // look directly for the information.
6505         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
6506 
6507         // We potentially have map information for this declaration already.
6508         // Look for the first set of components that refer to it.
6509         if (It != Info.end()) {
6510           auto CI = std::find_if(
6511               It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
6512                 return MI.Components.back().getAssociatedDeclaration() == VD;
6513               });
6514           // If we found a map entry, signal that the pointer has to be returned
6515           // and move on to the next declaration.
6516           if (CI != It->second.end()) {
6517             CI->ReturnDevicePointer = isa<MemberExpr>(IE)
6518                                           ? (VD->getType()->isReferenceType()
6519                                                  ? MapInfo::RPK_MemberReference
6520                                                  : MapInfo::RPK_Member)
6521                                           : MapInfo::RPK_Base;
6522             continue;
6523           }
6524         }
6525 
6526         // We didn't find any match in our map information - generate a zero
6527         // size array section.
6528         // FIXME: MSVC 2013 seems to require this-> to find member CGF.
6529         llvm::Value *Ptr =
6530             this->CGF
6531                 .EmitLoadOfLValue(this->CGF.EmitLValue(IE), SourceLocation())
6532                 .getScalarVal();
6533         BasePointers.push_back({Ptr, VD});
6534         Pointers.push_back(Ptr);
6535         Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy));
6536         Types.push_back(OMP_MAP_RETURN_PTR | OMP_MAP_FIRST_REF);
6537       }
6538 
6539     for (auto &M : Info) {
6540       // We need to know when we generate information for the first component
6541       // associated with a capture, because the mapping flags depend on it.
6542       bool IsFirstComponentList = true;
6543       for (MapInfo &L : M.second) {
6544         assert(!L.Components.empty() &&
6545                "Not expecting declaration with no component lists.");
6546 
6547         // Remember the current base pointer index.
6548         unsigned CurrentBasePointersIdx = BasePointers.size();
6549         // FIXME: MSVC 2013 seems to require this-> to find the member method.
6550         this->generateInfoForComponentList(
6551             L.MapType, L.MapTypeModifier, L.Components, BasePointers, Pointers,
6552             Sizes, Types, IsFirstComponentList, L.IsImplicit);
6553 
6554         // If this entry relates with a device pointer, set the relevant
6555         // declaration and add the 'return pointer' flag.
6556         if (IsFirstComponentList &&
6557             L.ReturnDevicePointer != MapInfo::RPK_None) {
6558           // If the pointer is not the base of the map, we need to skip the
6559           // base. If it is a reference in a member field, we also need to skip
6560           // the map of the reference.
6561           if (L.ReturnDevicePointer != MapInfo::RPK_Base) {
6562             ++CurrentBasePointersIdx;
6563             if (L.ReturnDevicePointer == MapInfo::RPK_MemberReference)
6564               ++CurrentBasePointersIdx;
6565           }
6566           assert(BasePointers.size() > CurrentBasePointersIdx &&
6567                  "Unexpected number of mapped base pointers.");
6568 
6569           auto *RelevantVD = L.Components.back().getAssociatedDeclaration();
6570           assert(RelevantVD &&
6571                  "No relevant declaration related with device pointer??");
6572 
6573           BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
6574           Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PTR;
6575         }
6576         IsFirstComponentList = false;
6577       }
6578     }
6579   }
6580 
6581   /// \brief Generate the base pointers, section pointers, sizes and map types
6582   /// associated to a given capture.
6583   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
6584                               llvm::Value *Arg,
6585                               MapBaseValuesArrayTy &BasePointers,
6586                               MapValuesArrayTy &Pointers,
6587                               MapValuesArrayTy &Sizes,
6588                               MapFlagsArrayTy &Types) const {
6589     assert(!Cap->capturesVariableArrayType() &&
6590            "Not expecting to generate map info for a variable array type!");
6591 
6592     BasePointers.clear();
6593     Pointers.clear();
6594     Sizes.clear();
6595     Types.clear();
6596 
6597     // We need to know when we generating information for the first component
6598     // associated with a capture, because the mapping flags depend on it.
6599     bool IsFirstComponentList = true;
6600 
6601     const ValueDecl *VD =
6602         Cap->capturesThis()
6603             ? nullptr
6604             : cast<ValueDecl>(Cap->getCapturedVar()->getCanonicalDecl());
6605 
6606     // If this declaration appears in a is_device_ptr clause we just have to
6607     // pass the pointer by value. If it is a reference to a declaration, we just
6608     // pass its value, otherwise, if it is a member expression, we need to map
6609     // 'to' the field.
6610     if (!VD) {
6611       auto It = DevPointersMap.find(VD);
6612       if (It != DevPointersMap.end()) {
6613         for (auto L : It->second) {
6614           generateInfoForComponentList(
6615               /*MapType=*/OMPC_MAP_to, /*MapTypeModifier=*/OMPC_MAP_unknown, L,
6616               BasePointers, Pointers, Sizes, Types, IsFirstComponentList,
6617               /*IsImplicit=*/false);
6618           IsFirstComponentList = false;
6619         }
6620         return;
6621       }
6622     } else if (DevPointersMap.count(VD)) {
6623       BasePointers.push_back({Arg, VD});
6624       Pointers.push_back(Arg);
6625       Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
6626       Types.push_back(OMP_MAP_PRIVATE_VAL | OMP_MAP_FIRST_REF);
6627       return;
6628     }
6629 
6630     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
6631     for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
6632       for (auto L : C->decl_component_lists(VD)) {
6633         assert(L.first == VD &&
6634                "We got information for the wrong declaration??");
6635         assert(!L.second.empty() &&
6636                "Not expecting declaration with no component lists.");
6637         generateInfoForComponentList(
6638             C->getMapType(), C->getMapTypeModifier(), L.second, BasePointers,
6639             Pointers, Sizes, Types, IsFirstComponentList, C->isImplicit());
6640         IsFirstComponentList = false;
6641       }
6642 
6643     return;
6644   }
6645 
6646   /// \brief Generate the default map information for a given capture \a CI,
6647   /// record field declaration \a RI and captured value \a CV.
6648   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
6649                               const FieldDecl &RI, llvm::Value *CV,
6650                               MapBaseValuesArrayTy &CurBasePointers,
6651                               MapValuesArrayTy &CurPointers,
6652                               MapValuesArrayTy &CurSizes,
6653                               MapFlagsArrayTy &CurMapTypes) {
6654 
6655     // Do the default mapping.
6656     if (CI.capturesThis()) {
6657       CurBasePointers.push_back(CV);
6658       CurPointers.push_back(CV);
6659       const PointerType *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
6660       CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType()));
6661       // Default map type.
6662       CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
6663     } else if (CI.capturesVariableByCopy()) {
6664       CurBasePointers.push_back(CV);
6665       CurPointers.push_back(CV);
6666       if (!RI.getType()->isAnyPointerType()) {
6667         // We have to signal to the runtime captures passed by value that are
6668         // not pointers.
6669         CurMapTypes.push_back(OMP_MAP_PRIVATE_VAL);
6670         CurSizes.push_back(CGF.getTypeSize(RI.getType()));
6671       } else {
6672         // Pointers are implicitly mapped with a zero size and no flags
6673         // (other than first map that is added for all implicit maps).
6674         CurMapTypes.push_back(0u);
6675         CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy));
6676       }
6677     } else {
6678       assert(CI.capturesVariable() && "Expected captured reference.");
6679       CurBasePointers.push_back(CV);
6680       CurPointers.push_back(CV);
6681 
6682       const ReferenceType *PtrTy =
6683           cast<ReferenceType>(RI.getType().getTypePtr());
6684       QualType ElementType = PtrTy->getPointeeType();
6685       CurSizes.push_back(CGF.getTypeSize(ElementType));
6686       // The default map type for a scalar/complex type is 'to' because by
6687       // default the value doesn't have to be retrieved. For an aggregate
6688       // type, the default is 'tofrom'.
6689       CurMapTypes.push_back(ElementType->isAggregateType()
6690                                 ? (OMP_MAP_TO | OMP_MAP_FROM)
6691                                 : OMP_MAP_TO);
6692 
6693       // If we have a capture by reference we may need to add the private
6694       // pointer flag if the base declaration shows in some first-private
6695       // clause.
6696       CurMapTypes.back() =
6697           adjustMapModifiersForPrivateClauses(CI, CurMapTypes.back());
6698     }
6699     // Every default map produces a single argument, so, it is always the
6700     // first one.
6701     CurMapTypes.back() |= OMP_MAP_FIRST_REF;
6702   }
6703 };
6704 
6705 enum OpenMPOffloadingReservedDeviceIDs {
6706   /// \brief Device ID if the device was not defined, runtime should get it
6707   /// from environment variables in the spec.
6708   OMP_DEVICEID_UNDEF = -1,
6709 };
6710 } // anonymous namespace
6711 
6712 /// \brief Emit the arrays used to pass the captures and map information to the
6713 /// offloading runtime library. If there is no map or capture information,
6714 /// return nullptr by reference.
6715 static void
6716 emitOffloadingArrays(CodeGenFunction &CGF,
6717                      MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
6718                      MappableExprsHandler::MapValuesArrayTy &Pointers,
6719                      MappableExprsHandler::MapValuesArrayTy &Sizes,
6720                      MappableExprsHandler::MapFlagsArrayTy &MapTypes,
6721                      CGOpenMPRuntime::TargetDataInfo &Info) {
6722   auto &CGM = CGF.CGM;
6723   auto &Ctx = CGF.getContext();
6724 
6725   // Reset the array information.
6726   Info.clearArrayInfo();
6727   Info.NumberOfPtrs = BasePointers.size();
6728 
6729   if (Info.NumberOfPtrs) {
6730     // Detect if we have any capture size requiring runtime evaluation of the
6731     // size so that a constant array could be eventually used.
6732     bool hasRuntimeEvaluationCaptureSize = false;
6733     for (auto *S : Sizes)
6734       if (!isa<llvm::Constant>(S)) {
6735         hasRuntimeEvaluationCaptureSize = true;
6736         break;
6737       }
6738 
6739     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
6740     QualType PointerArrayType =
6741         Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
6742                                  /*IndexTypeQuals=*/0);
6743 
6744     Info.BasePointersArray =
6745         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
6746     Info.PointersArray =
6747         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
6748 
6749     // If we don't have any VLA types or other types that require runtime
6750     // evaluation, we can use a constant array for the map sizes, otherwise we
6751     // need to fill up the arrays as we do for the pointers.
6752     if (hasRuntimeEvaluationCaptureSize) {
6753       QualType SizeArrayType = Ctx.getConstantArrayType(
6754           Ctx.getSizeType(), PointerNumAP, ArrayType::Normal,
6755           /*IndexTypeQuals=*/0);
6756       Info.SizesArray =
6757           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
6758     } else {
6759       // We expect all the sizes to be constant, so we collect them to create
6760       // a constant array.
6761       SmallVector<llvm::Constant *, 16> ConstSizes;
6762       for (auto S : Sizes)
6763         ConstSizes.push_back(cast<llvm::Constant>(S));
6764 
6765       auto *SizesArrayInit = llvm::ConstantArray::get(
6766           llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes);
6767       auto *SizesArrayGbl = new llvm::GlobalVariable(
6768           CGM.getModule(), SizesArrayInit->getType(),
6769           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
6770           SizesArrayInit, ".offload_sizes");
6771       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
6772       Info.SizesArray = SizesArrayGbl;
6773     }
6774 
6775     // The map types are always constant so we don't need to generate code to
6776     // fill arrays. Instead, we create an array constant.
6777     llvm::Constant *MapTypesArrayInit =
6778         llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes);
6779     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
6780         CGM.getModule(), MapTypesArrayInit->getType(),
6781         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
6782         MapTypesArrayInit, ".offload_maptypes");
6783     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
6784     Info.MapTypesArray = MapTypesArrayGbl;
6785 
6786     for (unsigned i = 0; i < Info.NumberOfPtrs; ++i) {
6787       llvm::Value *BPVal = *BasePointers[i];
6788       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
6789           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
6790           Info.BasePointersArray, 0, i);
6791       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6792           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
6793       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
6794       CGF.Builder.CreateStore(BPVal, BPAddr);
6795 
6796       if (Info.requiresDevicePointerInfo())
6797         if (auto *DevVD = BasePointers[i].getDevicePtrDecl())
6798           Info.CaptureDeviceAddrMap.insert(std::make_pair(DevVD, BPAddr));
6799 
6800       llvm::Value *PVal = Pointers[i];
6801       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
6802           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
6803           Info.PointersArray, 0, i);
6804       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6805           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
6806       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
6807       CGF.Builder.CreateStore(PVal, PAddr);
6808 
6809       if (hasRuntimeEvaluationCaptureSize) {
6810         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
6811             llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs),
6812             Info.SizesArray,
6813             /*Idx0=*/0,
6814             /*Idx1=*/i);
6815         Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType()));
6816         CGF.Builder.CreateStore(
6817             CGF.Builder.CreateIntCast(Sizes[i], CGM.SizeTy, /*isSigned=*/true),
6818             SAddr);
6819       }
6820     }
6821   }
6822 }
6823 /// \brief Emit the arguments to be passed to the runtime library based on the
6824 /// arrays of pointers, sizes and map types.
6825 static void emitOffloadingArraysArgument(
6826     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
6827     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
6828     llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
6829   auto &CGM = CGF.CGM;
6830   if (Info.NumberOfPtrs) {
6831     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
6832         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
6833         Info.BasePointersArray,
6834         /*Idx0=*/0, /*Idx1=*/0);
6835     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
6836         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
6837         Info.PointersArray,
6838         /*Idx0=*/0,
6839         /*Idx1=*/0);
6840     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
6841         llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray,
6842         /*Idx0=*/0, /*Idx1=*/0);
6843     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
6844         llvm::ArrayType::get(CGM.Int32Ty, Info.NumberOfPtrs),
6845         Info.MapTypesArray,
6846         /*Idx0=*/0,
6847         /*Idx1=*/0);
6848   } else {
6849     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
6850     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
6851     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo());
6852     MapTypesArrayArg =
6853         llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo());
6854   }
6855 }
6856 
6857 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
6858                                      const OMPExecutableDirective &D,
6859                                      llvm::Value *OutlinedFn,
6860                                      llvm::Value *OutlinedFnID,
6861                                      const Expr *IfCond, const Expr *Device,
6862                                      ArrayRef<llvm::Value *> CapturedVars) {
6863   if (!CGF.HaveInsertPoint())
6864     return;
6865 
6866   assert(OutlinedFn && "Invalid outlined function!");
6867 
6868   // Fill up the arrays with all the captured variables.
6869   MappableExprsHandler::MapValuesArrayTy KernelArgs;
6870   MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
6871   MappableExprsHandler::MapValuesArrayTy Pointers;
6872   MappableExprsHandler::MapValuesArrayTy Sizes;
6873   MappableExprsHandler::MapFlagsArrayTy MapTypes;
6874 
6875   MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
6876   MappableExprsHandler::MapValuesArrayTy CurPointers;
6877   MappableExprsHandler::MapValuesArrayTy CurSizes;
6878   MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
6879 
6880   // Get mappable expression information.
6881   MappableExprsHandler MEHandler(D, CGF);
6882 
6883   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
6884   auto RI = CS.getCapturedRecordDecl()->field_begin();
6885   auto CV = CapturedVars.begin();
6886   for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
6887                                             CE = CS.capture_end();
6888        CI != CE; ++CI, ++RI, ++CV) {
6889     StringRef Name;
6890     QualType Ty;
6891 
6892     CurBasePointers.clear();
6893     CurPointers.clear();
6894     CurSizes.clear();
6895     CurMapTypes.clear();
6896 
6897     // VLA sizes are passed to the outlined region by copy and do not have map
6898     // information associated.
6899     if (CI->capturesVariableArrayType()) {
6900       CurBasePointers.push_back(*CV);
6901       CurPointers.push_back(*CV);
6902       CurSizes.push_back(CGF.getTypeSize(RI->getType()));
6903       // Copy to the device as an argument. No need to retrieve it.
6904       CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_PRIVATE_VAL |
6905                             MappableExprsHandler::OMP_MAP_FIRST_REF);
6906     } else {
6907       // If we have any information in the map clause, we use it, otherwise we
6908       // just do a default mapping.
6909       MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
6910                                        CurSizes, CurMapTypes);
6911       if (CurBasePointers.empty())
6912         MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
6913                                          CurPointers, CurSizes, CurMapTypes);
6914     }
6915     // We expect to have at least an element of information for this capture.
6916     assert(!CurBasePointers.empty() && "Non-existing map pointer for capture!");
6917     assert(CurBasePointers.size() == CurPointers.size() &&
6918            CurBasePointers.size() == CurSizes.size() &&
6919            CurBasePointers.size() == CurMapTypes.size() &&
6920            "Inconsistent map information sizes!");
6921 
6922     // The kernel args are always the first elements of the base pointers
6923     // associated with a capture.
6924     KernelArgs.push_back(*CurBasePointers.front());
6925     // We need to append the results of this capture to what we already have.
6926     BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
6927     Pointers.append(CurPointers.begin(), CurPointers.end());
6928     Sizes.append(CurSizes.begin(), CurSizes.end());
6929     MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
6930   }
6931 
6932   // Fill up the pointer arrays and transfer execution to the device.
6933   auto &&ThenGen = [this, &BasePointers, &Pointers, &Sizes, &MapTypes, Device,
6934                     OutlinedFn, OutlinedFnID, &D,
6935                     &KernelArgs](CodeGenFunction &CGF, PrePostActionTy &) {
6936     auto &RT = CGF.CGM.getOpenMPRuntime();
6937     // Emit the offloading arrays.
6938     TargetDataInfo Info;
6939     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
6940     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
6941                                  Info.PointersArray, Info.SizesArray,
6942                                  Info.MapTypesArray, Info);
6943 
6944     // On top of the arrays that were filled up, the target offloading call
6945     // takes as arguments the device id as well as the host pointer. The host
6946     // pointer is used by the runtime library to identify the current target
6947     // region, so it only has to be unique and not necessarily point to
6948     // anything. It could be the pointer to the outlined function that
6949     // implements the target region, but we aren't using that so that the
6950     // compiler doesn't need to keep that, and could therefore inline the host
6951     // function if proven worthwhile during optimization.
6952 
6953     // From this point on, we need to have an ID of the target region defined.
6954     assert(OutlinedFnID && "Invalid outlined function ID!");
6955 
6956     // Emit device ID if any.
6957     llvm::Value *DeviceID;
6958     if (Device)
6959       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
6960                                            CGF.Int32Ty, /*isSigned=*/true);
6961     else
6962       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
6963 
6964     // Emit the number of elements in the offloading arrays.
6965     llvm::Value *PointerNum = CGF.Builder.getInt32(BasePointers.size());
6966 
6967     // Return value of the runtime offloading call.
6968     llvm::Value *Return;
6969 
6970     auto *NumTeams = emitNumTeamsForTargetDirective(RT, CGF, D);
6971     auto *NumThreads = emitNumThreadsForTargetDirective(RT, CGF, D);
6972 
6973     // The target region is an outlined function launched by the runtime
6974     // via calls __tgt_target() or __tgt_target_teams().
6975     //
6976     // __tgt_target() launches a target region with one team and one thread,
6977     // executing a serial region.  This master thread may in turn launch
6978     // more threads within its team upon encountering a parallel region,
6979     // however, no additional teams can be launched on the device.
6980     //
6981     // __tgt_target_teams() launches a target region with one or more teams,
6982     // each with one or more threads.  This call is required for target
6983     // constructs such as:
6984     //  'target teams'
6985     //  'target' / 'teams'
6986     //  'target teams distribute parallel for'
6987     //  'target parallel'
6988     // and so on.
6989     //
6990     // Note that on the host and CPU targets, the runtime implementation of
6991     // these calls simply call the outlined function without forking threads.
6992     // The outlined functions themselves have runtime calls to
6993     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
6994     // the compiler in emitTeamsCall() and emitParallelCall().
6995     //
6996     // In contrast, on the NVPTX target, the implementation of
6997     // __tgt_target_teams() launches a GPU kernel with the requested number
6998     // of teams and threads so no additional calls to the runtime are required.
6999     if (NumTeams) {
7000       // If we have NumTeams defined this means that we have an enclosed teams
7001       // region. Therefore we also expect to have NumThreads defined. These two
7002       // values should be defined in the presence of a teams directive,
7003       // regardless of having any clauses associated. If the user is using teams
7004       // but no clauses, these two values will be the default that should be
7005       // passed to the runtime library - a 32-bit integer with the value zero.
7006       assert(NumThreads && "Thread limit expression should be available along "
7007                            "with number of teams.");
7008       llvm::Value *OffloadingArgs[] = {
7009           DeviceID,           OutlinedFnID,
7010           PointerNum,         Info.BasePointersArray,
7011           Info.PointersArray, Info.SizesArray,
7012           Info.MapTypesArray, NumTeams,
7013           NumThreads};
7014       Return = CGF.EmitRuntimeCall(
7015           RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs);
7016     } else {
7017       llvm::Value *OffloadingArgs[] = {
7018           DeviceID,           OutlinedFnID,
7019           PointerNum,         Info.BasePointersArray,
7020           Info.PointersArray, Info.SizesArray,
7021           Info.MapTypesArray};
7022       Return = CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target),
7023                                    OffloadingArgs);
7024     }
7025 
7026     // Check the error code and execute the host version if required.
7027     llvm::BasicBlock *OffloadFailedBlock =
7028         CGF.createBasicBlock("omp_offload.failed");
7029     llvm::BasicBlock *OffloadContBlock =
7030         CGF.createBasicBlock("omp_offload.cont");
7031     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
7032     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
7033 
7034     CGF.EmitBlock(OffloadFailedBlock);
7035     emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, KernelArgs);
7036     CGF.EmitBranch(OffloadContBlock);
7037 
7038     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
7039   };
7040 
7041   // Notify that the host version must be executed.
7042   auto &&ElseGen = [this, &D, OutlinedFn, &KernelArgs](CodeGenFunction &CGF,
7043                                                       PrePostActionTy &) {
7044     emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn,
7045                              KernelArgs);
7046   };
7047 
7048   // If we have a target function ID it means that we need to support
7049   // offloading, otherwise, just execute on the host. We need to execute on host
7050   // regardless of the conditional in the if clause if, e.g., the user do not
7051   // specify target triples.
7052   if (OutlinedFnID) {
7053     if (IfCond)
7054       emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
7055     else {
7056       RegionCodeGenTy ThenRCG(ThenGen);
7057       ThenRCG(CGF);
7058     }
7059   } else {
7060     RegionCodeGenTy ElseRCG(ElseGen);
7061     ElseRCG(CGF);
7062   }
7063 }
7064 
7065 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
7066                                                     StringRef ParentName) {
7067   if (!S)
7068     return;
7069 
7070   // Codegen OMP target directives that offload compute to the device.
7071   bool requiresDeviceCodegen =
7072       isa<OMPExecutableDirective>(S) &&
7073       isOpenMPTargetExecutionDirective(
7074           cast<OMPExecutableDirective>(S)->getDirectiveKind());
7075 
7076   if (requiresDeviceCodegen) {
7077     auto &E = *cast<OMPExecutableDirective>(S);
7078     unsigned DeviceID;
7079     unsigned FileID;
7080     unsigned Line;
7081     getTargetEntryUniqueInfo(CGM.getContext(), E.getLocStart(), DeviceID,
7082                              FileID, Line);
7083 
7084     // Is this a target region that should not be emitted as an entry point? If
7085     // so just signal we are done with this target region.
7086     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
7087                                                             ParentName, Line))
7088       return;
7089 
7090     switch (S->getStmtClass()) {
7091     case Stmt::OMPTargetDirectiveClass:
7092       CodeGenFunction::EmitOMPTargetDeviceFunction(
7093           CGM, ParentName, cast<OMPTargetDirective>(*S));
7094       break;
7095     case Stmt::OMPTargetParallelDirectiveClass:
7096       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
7097           CGM, ParentName, cast<OMPTargetParallelDirective>(*S));
7098       break;
7099     case Stmt::OMPTargetTeamsDirectiveClass:
7100       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
7101           CGM, ParentName, cast<OMPTargetTeamsDirective>(*S));
7102       break;
7103     default:
7104       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
7105     }
7106     return;
7107   }
7108 
7109   if (const OMPExecutableDirective *E = dyn_cast<OMPExecutableDirective>(S)) {
7110     if (!E->hasAssociatedStmt())
7111       return;
7112 
7113     scanForTargetRegionsFunctions(
7114         cast<CapturedStmt>(E->getAssociatedStmt())->getCapturedStmt(),
7115         ParentName);
7116     return;
7117   }
7118 
7119   // If this is a lambda function, look into its body.
7120   if (auto *L = dyn_cast<LambdaExpr>(S))
7121     S = L->getBody();
7122 
7123   // Keep looking for target regions recursively.
7124   for (auto *II : S->children())
7125     scanForTargetRegionsFunctions(II, ParentName);
7126 }
7127 
7128 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
7129   auto &FD = *cast<FunctionDecl>(GD.getDecl());
7130 
7131   // If emitting code for the host, we do not process FD here. Instead we do
7132   // the normal code generation.
7133   if (!CGM.getLangOpts().OpenMPIsDevice)
7134     return false;
7135 
7136   // Try to detect target regions in the function.
7137   scanForTargetRegionsFunctions(FD.getBody(), CGM.getMangledName(GD));
7138 
7139   // We should not emit any function other that the ones created during the
7140   // scanning. Therefore, we signal that this function is completely dealt
7141   // with.
7142   return true;
7143 }
7144 
7145 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
7146   if (!CGM.getLangOpts().OpenMPIsDevice)
7147     return false;
7148 
7149   // Check if there are Ctors/Dtors in this declaration and look for target
7150   // regions in it. We use the complete variant to produce the kernel name
7151   // mangling.
7152   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
7153   if (auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
7154     for (auto *Ctor : RD->ctors()) {
7155       StringRef ParentName =
7156           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
7157       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
7158     }
7159     auto *Dtor = RD->getDestructor();
7160     if (Dtor) {
7161       StringRef ParentName =
7162           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
7163       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
7164     }
7165   }
7166 
7167   // If we are in target mode, we do not emit any global (declare target is not
7168   // implemented yet). Therefore we signal that GD was processed in this case.
7169   return true;
7170 }
7171 
7172 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
7173   auto *VD = GD.getDecl();
7174   if (isa<FunctionDecl>(VD))
7175     return emitTargetFunctions(GD);
7176 
7177   return emitTargetGlobalVariable(GD);
7178 }
7179 
7180 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
7181   // If we have offloading in the current module, we need to emit the entries
7182   // now and register the offloading descriptor.
7183   createOffloadEntriesAndInfoMetadata();
7184 
7185   // Create and register the offloading binary descriptors. This is the main
7186   // entity that captures all the information about offloading in the current
7187   // compilation unit.
7188   return createOffloadingBinaryDescriptorRegistration();
7189 }
7190 
7191 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
7192                                     const OMPExecutableDirective &D,
7193                                     SourceLocation Loc,
7194                                     llvm::Value *OutlinedFn,
7195                                     ArrayRef<llvm::Value *> CapturedVars) {
7196   if (!CGF.HaveInsertPoint())
7197     return;
7198 
7199   auto *RTLoc = emitUpdateLocation(CGF, Loc);
7200   CodeGenFunction::RunCleanupsScope Scope(CGF);
7201 
7202   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
7203   llvm::Value *Args[] = {
7204       RTLoc,
7205       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
7206       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
7207   llvm::SmallVector<llvm::Value *, 16> RealArgs;
7208   RealArgs.append(std::begin(Args), std::end(Args));
7209   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
7210 
7211   auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
7212   CGF.EmitRuntimeCall(RTLFn, RealArgs);
7213 }
7214 
7215 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
7216                                          const Expr *NumTeams,
7217                                          const Expr *ThreadLimit,
7218                                          SourceLocation Loc) {
7219   if (!CGF.HaveInsertPoint())
7220     return;
7221 
7222   auto *RTLoc = emitUpdateLocation(CGF, Loc);
7223 
7224   llvm::Value *NumTeamsVal =
7225       (NumTeams)
7226           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
7227                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
7228           : CGF.Builder.getInt32(0);
7229 
7230   llvm::Value *ThreadLimitVal =
7231       (ThreadLimit)
7232           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
7233                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
7234           : CGF.Builder.getInt32(0);
7235 
7236   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
7237   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
7238                                      ThreadLimitVal};
7239   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
7240                       PushNumTeamsArgs);
7241 }
7242 
7243 void CGOpenMPRuntime::emitTargetDataCalls(
7244     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
7245     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
7246   if (!CGF.HaveInsertPoint())
7247     return;
7248 
7249   // Action used to replace the default codegen action and turn privatization
7250   // off.
7251   PrePostActionTy NoPrivAction;
7252 
7253   // Generate the code for the opening of the data environment. Capture all the
7254   // arguments of the runtime call by reference because they are used in the
7255   // closing of the region.
7256   auto &&BeginThenGen = [&D, Device, &Info, &CodeGen](CodeGenFunction &CGF,
7257                                                       PrePostActionTy &) {
7258     // Fill up the arrays with all the mapped variables.
7259     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
7260     MappableExprsHandler::MapValuesArrayTy Pointers;
7261     MappableExprsHandler::MapValuesArrayTy Sizes;
7262     MappableExprsHandler::MapFlagsArrayTy MapTypes;
7263 
7264     // Get map clause information.
7265     MappableExprsHandler MCHandler(D, CGF);
7266     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
7267 
7268     // Fill up the arrays and create the arguments.
7269     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
7270 
7271     llvm::Value *BasePointersArrayArg = nullptr;
7272     llvm::Value *PointersArrayArg = nullptr;
7273     llvm::Value *SizesArrayArg = nullptr;
7274     llvm::Value *MapTypesArrayArg = nullptr;
7275     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
7276                                  SizesArrayArg, MapTypesArrayArg, Info);
7277 
7278     // Emit device ID if any.
7279     llvm::Value *DeviceID = nullptr;
7280     if (Device)
7281       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
7282                                            CGF.Int32Ty, /*isSigned=*/true);
7283     else
7284       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
7285 
7286     // Emit the number of elements in the offloading arrays.
7287     auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
7288 
7289     llvm::Value *OffloadingArgs[] = {
7290         DeviceID,         PointerNum,    BasePointersArrayArg,
7291         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
7292     auto &RT = CGF.CGM.getOpenMPRuntime();
7293     CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_begin),
7294                         OffloadingArgs);
7295 
7296     // If device pointer privatization is required, emit the body of the region
7297     // here. It will have to be duplicated: with and without privatization.
7298     if (!Info.CaptureDeviceAddrMap.empty())
7299       CodeGen(CGF);
7300   };
7301 
7302   // Generate code for the closing of the data region.
7303   auto &&EndThenGen = [Device, &Info](CodeGenFunction &CGF, PrePostActionTy &) {
7304     assert(Info.isValid() && "Invalid data environment closing arguments.");
7305 
7306     llvm::Value *BasePointersArrayArg = nullptr;
7307     llvm::Value *PointersArrayArg = nullptr;
7308     llvm::Value *SizesArrayArg = nullptr;
7309     llvm::Value *MapTypesArrayArg = nullptr;
7310     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
7311                                  SizesArrayArg, MapTypesArrayArg, Info);
7312 
7313     // Emit device ID if any.
7314     llvm::Value *DeviceID = nullptr;
7315     if (Device)
7316       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
7317                                            CGF.Int32Ty, /*isSigned=*/true);
7318     else
7319       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
7320 
7321     // Emit the number of elements in the offloading arrays.
7322     auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
7323 
7324     llvm::Value *OffloadingArgs[] = {
7325         DeviceID,         PointerNum,    BasePointersArrayArg,
7326         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
7327     auto &RT = CGF.CGM.getOpenMPRuntime();
7328     CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_end),
7329                         OffloadingArgs);
7330   };
7331 
7332   // If we need device pointer privatization, we need to emit the body of the
7333   // region with no privatization in the 'else' branch of the conditional.
7334   // Otherwise, we don't have to do anything.
7335   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
7336                                                          PrePostActionTy &) {
7337     if (!Info.CaptureDeviceAddrMap.empty()) {
7338       CodeGen.setAction(NoPrivAction);
7339       CodeGen(CGF);
7340     }
7341   };
7342 
7343   // We don't have to do anything to close the region if the if clause evaluates
7344   // to false.
7345   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
7346 
7347   if (IfCond) {
7348     emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
7349   } else {
7350     RegionCodeGenTy RCG(BeginThenGen);
7351     RCG(CGF);
7352   }
7353 
7354   // If we don't require privatization of device pointers, we emit the body in
7355   // between the runtime calls. This avoids duplicating the body code.
7356   if (Info.CaptureDeviceAddrMap.empty()) {
7357     CodeGen.setAction(NoPrivAction);
7358     CodeGen(CGF);
7359   }
7360 
7361   if (IfCond) {
7362     emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen);
7363   } else {
7364     RegionCodeGenTy RCG(EndThenGen);
7365     RCG(CGF);
7366   }
7367 }
7368 
7369 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
7370     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
7371     const Expr *Device) {
7372   if (!CGF.HaveInsertPoint())
7373     return;
7374 
7375   assert((isa<OMPTargetEnterDataDirective>(D) ||
7376           isa<OMPTargetExitDataDirective>(D) ||
7377           isa<OMPTargetUpdateDirective>(D)) &&
7378          "Expecting either target enter, exit data, or update directives.");
7379 
7380   // Generate the code for the opening of the data environment.
7381   auto &&ThenGen = [&D, Device](CodeGenFunction &CGF, PrePostActionTy &) {
7382     // Fill up the arrays with all the mapped variables.
7383     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
7384     MappableExprsHandler::MapValuesArrayTy Pointers;
7385     MappableExprsHandler::MapValuesArrayTy Sizes;
7386     MappableExprsHandler::MapFlagsArrayTy MapTypes;
7387 
7388     // Get map clause information.
7389     MappableExprsHandler MEHandler(D, CGF);
7390     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
7391 
7392     // Fill up the arrays and create the arguments.
7393     TargetDataInfo Info;
7394     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
7395     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
7396                                  Info.PointersArray, Info.SizesArray,
7397                                  Info.MapTypesArray, Info);
7398 
7399     // Emit device ID if any.
7400     llvm::Value *DeviceID = nullptr;
7401     if (Device)
7402       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
7403                                            CGF.Int32Ty, /*isSigned=*/true);
7404     else
7405       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
7406 
7407     // Emit the number of elements in the offloading arrays.
7408     auto *PointerNum = CGF.Builder.getInt32(BasePointers.size());
7409 
7410     llvm::Value *OffloadingArgs[] = {
7411         DeviceID,           PointerNum,      Info.BasePointersArray,
7412         Info.PointersArray, Info.SizesArray, Info.MapTypesArray};
7413 
7414     auto &RT = CGF.CGM.getOpenMPRuntime();
7415     // Select the right runtime function call for each expected standalone
7416     // directive.
7417     OpenMPRTLFunction RTLFn;
7418     switch (D.getDirectiveKind()) {
7419     default:
7420       llvm_unreachable("Unexpected standalone target data directive.");
7421       break;
7422     case OMPD_target_enter_data:
7423       RTLFn = OMPRTL__tgt_target_data_begin;
7424       break;
7425     case OMPD_target_exit_data:
7426       RTLFn = OMPRTL__tgt_target_data_end;
7427       break;
7428     case OMPD_target_update:
7429       RTLFn = OMPRTL__tgt_target_data_update;
7430       break;
7431     }
7432     CGF.EmitRuntimeCall(RT.createRuntimeFunction(RTLFn), OffloadingArgs);
7433   };
7434 
7435   // In the event we get an if clause, we don't have to take any action on the
7436   // else side.
7437   auto &&ElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
7438 
7439   if (IfCond) {
7440     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
7441   } else {
7442     RegionCodeGenTy ThenGenRCG(ThenGen);
7443     ThenGenRCG(CGF);
7444   }
7445 }
7446 
7447 namespace {
7448   /// Kind of parameter in a function with 'declare simd' directive.
7449   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
7450   /// Attribute set of the parameter.
7451   struct ParamAttrTy {
7452     ParamKindTy Kind = Vector;
7453     llvm::APSInt StrideOrArg;
7454     llvm::APSInt Alignment;
7455   };
7456 } // namespace
7457 
7458 static unsigned evaluateCDTSize(const FunctionDecl *FD,
7459                                 ArrayRef<ParamAttrTy> ParamAttrs) {
7460   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
7461   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
7462   // of that clause. The VLEN value must be power of 2.
7463   // In other case the notion of the function`s "characteristic data type" (CDT)
7464   // is used to compute the vector length.
7465   // CDT is defined in the following order:
7466   //   a) For non-void function, the CDT is the return type.
7467   //   b) If the function has any non-uniform, non-linear parameters, then the
7468   //   CDT is the type of the first such parameter.
7469   //   c) If the CDT determined by a) or b) above is struct, union, or class
7470   //   type which is pass-by-value (except for the type that maps to the
7471   //   built-in complex data type), the characteristic data type is int.
7472   //   d) If none of the above three cases is applicable, the CDT is int.
7473   // The VLEN is then determined based on the CDT and the size of vector
7474   // register of that ISA for which current vector version is generated. The
7475   // VLEN is computed using the formula below:
7476   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
7477   // where vector register size specified in section 3.2.1 Registers and the
7478   // Stack Frame of original AMD64 ABI document.
7479   QualType RetType = FD->getReturnType();
7480   if (RetType.isNull())
7481     return 0;
7482   ASTContext &C = FD->getASTContext();
7483   QualType CDT;
7484   if (!RetType.isNull() && !RetType->isVoidType())
7485     CDT = RetType;
7486   else {
7487     unsigned Offset = 0;
7488     if (auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
7489       if (ParamAttrs[Offset].Kind == Vector)
7490         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
7491       ++Offset;
7492     }
7493     if (CDT.isNull()) {
7494       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
7495         if (ParamAttrs[I + Offset].Kind == Vector) {
7496           CDT = FD->getParamDecl(I)->getType();
7497           break;
7498         }
7499       }
7500     }
7501   }
7502   if (CDT.isNull())
7503     CDT = C.IntTy;
7504   CDT = CDT->getCanonicalTypeUnqualified();
7505   if (CDT->isRecordType() || CDT->isUnionType())
7506     CDT = C.IntTy;
7507   return C.getTypeSize(CDT);
7508 }
7509 
7510 static void
7511 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
7512                            const llvm::APSInt &VLENVal,
7513                            ArrayRef<ParamAttrTy> ParamAttrs,
7514                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
7515   struct ISADataTy {
7516     char ISA;
7517     unsigned VecRegSize;
7518   };
7519   ISADataTy ISAData[] = {
7520       {
7521           'b', 128
7522       }, // SSE
7523       {
7524           'c', 256
7525       }, // AVX
7526       {
7527           'd', 256
7528       }, // AVX2
7529       {
7530           'e', 512
7531       }, // AVX512
7532   };
7533   llvm::SmallVector<char, 2> Masked;
7534   switch (State) {
7535   case OMPDeclareSimdDeclAttr::BS_Undefined:
7536     Masked.push_back('N');
7537     Masked.push_back('M');
7538     break;
7539   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
7540     Masked.push_back('N');
7541     break;
7542   case OMPDeclareSimdDeclAttr::BS_Inbranch:
7543     Masked.push_back('M');
7544     break;
7545   }
7546   for (auto Mask : Masked) {
7547     for (auto &Data : ISAData) {
7548       SmallString<256> Buffer;
7549       llvm::raw_svector_ostream Out(Buffer);
7550       Out << "_ZGV" << Data.ISA << Mask;
7551       if (!VLENVal) {
7552         Out << llvm::APSInt::getUnsigned(Data.VecRegSize /
7553                                          evaluateCDTSize(FD, ParamAttrs));
7554       } else
7555         Out << VLENVal;
7556       for (auto &ParamAttr : ParamAttrs) {
7557         switch (ParamAttr.Kind){
7558         case LinearWithVarStride:
7559           Out << 's' << ParamAttr.StrideOrArg;
7560           break;
7561         case Linear:
7562           Out << 'l';
7563           if (!!ParamAttr.StrideOrArg)
7564             Out << ParamAttr.StrideOrArg;
7565           break;
7566         case Uniform:
7567           Out << 'u';
7568           break;
7569         case Vector:
7570           Out << 'v';
7571           break;
7572         }
7573         if (!!ParamAttr.Alignment)
7574           Out << 'a' << ParamAttr.Alignment;
7575       }
7576       Out << '_' << Fn->getName();
7577       Fn->addFnAttr(Out.str());
7578     }
7579   }
7580 }
7581 
7582 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
7583                                               llvm::Function *Fn) {
7584   ASTContext &C = CGM.getContext();
7585   FD = FD->getCanonicalDecl();
7586   // Map params to their positions in function decl.
7587   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
7588   if (isa<CXXMethodDecl>(FD))
7589     ParamPositions.insert({FD, 0});
7590   unsigned ParamPos = ParamPositions.size();
7591   for (auto *P : FD->parameters()) {
7592     ParamPositions.insert({P->getCanonicalDecl(), ParamPos});
7593     ++ParamPos;
7594   }
7595   for (auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
7596     llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
7597     // Mark uniform parameters.
7598     for (auto *E : Attr->uniforms()) {
7599       E = E->IgnoreParenImpCasts();
7600       unsigned Pos;
7601       if (isa<CXXThisExpr>(E))
7602         Pos = ParamPositions[FD];
7603       else {
7604         auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
7605                         ->getCanonicalDecl();
7606         Pos = ParamPositions[PVD];
7607       }
7608       ParamAttrs[Pos].Kind = Uniform;
7609     }
7610     // Get alignment info.
7611     auto NI = Attr->alignments_begin();
7612     for (auto *E : Attr->aligneds()) {
7613       E = E->IgnoreParenImpCasts();
7614       unsigned Pos;
7615       QualType ParmTy;
7616       if (isa<CXXThisExpr>(E)) {
7617         Pos = ParamPositions[FD];
7618         ParmTy = E->getType();
7619       } else {
7620         auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
7621                         ->getCanonicalDecl();
7622         Pos = ParamPositions[PVD];
7623         ParmTy = PVD->getType();
7624       }
7625       ParamAttrs[Pos].Alignment =
7626           (*NI) ? (*NI)->EvaluateKnownConstInt(C)
7627                 : llvm::APSInt::getUnsigned(
7628                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
7629                           .getQuantity());
7630       ++NI;
7631     }
7632     // Mark linear parameters.
7633     auto SI = Attr->steps_begin();
7634     auto MI = Attr->modifiers_begin();
7635     for (auto *E : Attr->linears()) {
7636       E = E->IgnoreParenImpCasts();
7637       unsigned Pos;
7638       if (isa<CXXThisExpr>(E))
7639         Pos = ParamPositions[FD];
7640       else {
7641         auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
7642                         ->getCanonicalDecl();
7643         Pos = ParamPositions[PVD];
7644       }
7645       auto &ParamAttr = ParamAttrs[Pos];
7646       ParamAttr.Kind = Linear;
7647       if (*SI) {
7648         if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C,
7649                                   Expr::SE_AllowSideEffects)) {
7650           if (auto *DRE = cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
7651             if (auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
7652               ParamAttr.Kind = LinearWithVarStride;
7653               ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
7654                   ParamPositions[StridePVD->getCanonicalDecl()]);
7655             }
7656           }
7657         }
7658       }
7659       ++SI;
7660       ++MI;
7661     }
7662     llvm::APSInt VLENVal;
7663     if (const Expr *VLEN = Attr->getSimdlen())
7664       VLENVal = VLEN->EvaluateKnownConstInt(C);
7665     OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
7666     if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
7667         CGM.getTriple().getArch() == llvm::Triple::x86_64)
7668       emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
7669   }
7670 }
7671 
7672 namespace {
7673 /// Cleanup action for doacross support.
7674 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
7675 public:
7676   static const int DoacrossFinArgs = 2;
7677 
7678 private:
7679   llvm::Value *RTLFn;
7680   llvm::Value *Args[DoacrossFinArgs];
7681 
7682 public:
7683   DoacrossCleanupTy(llvm::Value *RTLFn, ArrayRef<llvm::Value *> CallArgs)
7684       : RTLFn(RTLFn) {
7685     assert(CallArgs.size() == DoacrossFinArgs);
7686     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
7687   }
7688   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
7689     if (!CGF.HaveInsertPoint())
7690       return;
7691     CGF.EmitRuntimeCall(RTLFn, Args);
7692   }
7693 };
7694 } // namespace
7695 
7696 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
7697                                        const OMPLoopDirective &D) {
7698   if (!CGF.HaveInsertPoint())
7699     return;
7700 
7701   ASTContext &C = CGM.getContext();
7702   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
7703   RecordDecl *RD;
7704   if (KmpDimTy.isNull()) {
7705     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
7706     //  kmp_int64 lo; // lower
7707     //  kmp_int64 up; // upper
7708     //  kmp_int64 st; // stride
7709     // };
7710     RD = C.buildImplicitRecord("kmp_dim");
7711     RD->startDefinition();
7712     addFieldToRecordDecl(C, RD, Int64Ty);
7713     addFieldToRecordDecl(C, RD, Int64Ty);
7714     addFieldToRecordDecl(C, RD, Int64Ty);
7715     RD->completeDefinition();
7716     KmpDimTy = C.getRecordType(RD);
7717   } else
7718     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
7719 
7720   Address DimsAddr = CGF.CreateMemTemp(KmpDimTy, "dims");
7721   CGF.EmitNullInitialization(DimsAddr, KmpDimTy);
7722   enum { LowerFD = 0, UpperFD, StrideFD };
7723   // Fill dims with data.
7724   LValue DimsLVal = CGF.MakeAddrLValue(DimsAddr, KmpDimTy);
7725   // dims.upper = num_iterations;
7726   LValue UpperLVal =
7727       CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), UpperFD));
7728   llvm::Value *NumIterVal = CGF.EmitScalarConversion(
7729       CGF.EmitScalarExpr(D.getNumIterations()), D.getNumIterations()->getType(),
7730       Int64Ty, D.getNumIterations()->getExprLoc());
7731   CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
7732   // dims.stride = 1;
7733   LValue StrideLVal =
7734       CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), StrideFD));
7735   CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
7736                         StrideLVal);
7737 
7738   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
7739   // kmp_int32 num_dims, struct kmp_dim * dims);
7740   llvm::Value *Args[] = {emitUpdateLocation(CGF, D.getLocStart()),
7741                          getThreadID(CGF, D.getLocStart()),
7742                          llvm::ConstantInt::getSigned(CGM.Int32Ty, 1),
7743                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7744                              DimsAddr.getPointer(), CGM.VoidPtrTy)};
7745 
7746   llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_init);
7747   CGF.EmitRuntimeCall(RTLFn, Args);
7748   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
7749       emitUpdateLocation(CGF, D.getLocEnd()), getThreadID(CGF, D.getLocEnd())};
7750   llvm::Value *FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
7751   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
7752                                              llvm::makeArrayRef(FiniArgs));
7753 }
7754 
7755 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
7756                                           const OMPDependClause *C) {
7757   QualType Int64Ty =
7758       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
7759   const Expr *CounterVal = C->getCounterValue();
7760   assert(CounterVal);
7761   llvm::Value *CntVal = CGF.EmitScalarConversion(CGF.EmitScalarExpr(CounterVal),
7762                                                  CounterVal->getType(), Int64Ty,
7763                                                  CounterVal->getExprLoc());
7764   Address CntAddr = CGF.CreateMemTemp(Int64Ty, ".cnt.addr");
7765   CGF.EmitStoreOfScalar(CntVal, CntAddr, /*Volatile=*/false, Int64Ty);
7766   llvm::Value *Args[] = {emitUpdateLocation(CGF, C->getLocStart()),
7767                          getThreadID(CGF, C->getLocStart()),
7768                          CntAddr.getPointer()};
7769   llvm::Value *RTLFn;
7770   if (C->getDependencyKind() == OMPC_DEPEND_source)
7771     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
7772   else {
7773     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
7774     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
7775   }
7776   CGF.EmitRuntimeCall(RTLFn, Args);
7777 }
7778 
7779 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, llvm::Value *Callee,
7780                                ArrayRef<llvm::Value *> Args,
7781                                SourceLocation Loc) const {
7782   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
7783 
7784   if (auto *Fn = dyn_cast<llvm::Function>(Callee)) {
7785     if (Fn->doesNotThrow()) {
7786       CGF.EmitNounwindRuntimeCall(Fn, Args);
7787       return;
7788     }
7789   }
7790   CGF.EmitRuntimeCall(Callee, Args);
7791 }
7792 
7793 void CGOpenMPRuntime::emitOutlinedFunctionCall(
7794     CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn,
7795     ArrayRef<llvm::Value *> Args) const {
7796   assert(Loc.isValid() && "Outlined function call location must be valid.");
7797   emitCall(CGF, OutlinedFn, Args, Loc);
7798 }
7799 
7800 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
7801                                              const VarDecl *NativeParam,
7802                                              const VarDecl *TargetParam) const {
7803   return CGF.GetAddrOfLocalVar(NativeParam);
7804 }
7805