1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for OpenMP runtime code generation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGOpenMPRuntime.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Decl.h"
19 #include "clang/AST/StmtOpenMP.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/Bitcode/ReaderWriter.h"
22 #include "llvm/IR/CallSite.h"
23 #include "llvm/IR/DerivedTypes.h"
24 #include "llvm/IR/GlobalValue.h"
25 #include "llvm/IR/Value.h"
26 #include "llvm/Support/Format.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <cassert>
29 
30 using namespace clang;
31 using namespace CodeGen;
32 
33 namespace {
34 /// \brief Base class for handling code generation inside OpenMP regions.
35 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
36 public:
37   /// \brief Kinds of OpenMP regions used in codegen.
38   enum CGOpenMPRegionKind {
39     /// \brief Region with outlined function for standalone 'parallel'
40     /// directive.
41     ParallelOutlinedRegion,
42     /// \brief Region with outlined function for standalone 'task' directive.
43     TaskOutlinedRegion,
44     /// \brief Region for constructs that do not require function outlining,
45     /// like 'for', 'sections', 'atomic' etc. directives.
46     InlinedRegion,
47     /// \brief Region with outlined function for standalone 'target' directive.
48     TargetRegion,
49   };
50 
51   CGOpenMPRegionInfo(const CapturedStmt &CS,
52                      const CGOpenMPRegionKind RegionKind,
53                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
54                      bool HasCancel)
55       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
56         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
57 
58   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
59                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
60                      bool HasCancel)
61       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
62         Kind(Kind), HasCancel(HasCancel) {}
63 
64   /// \brief Get a variable or parameter for storing global thread id
65   /// inside OpenMP construct.
66   virtual const VarDecl *getThreadIDVariable() const = 0;
67 
68   /// \brief Emit the captured statement body.
69   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
70 
71   /// \brief Get an LValue for the current ThreadID variable.
72   /// \return LValue for thread id variable. This LValue always has type int32*.
73   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
74 
75   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
76 
77   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
78 
79   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
80 
81   bool hasCancel() const { return HasCancel; }
82 
83   static bool classof(const CGCapturedStmtInfo *Info) {
84     return Info->getKind() == CR_OpenMP;
85   }
86 
87   ~CGOpenMPRegionInfo() override = default;
88 
89 protected:
90   CGOpenMPRegionKind RegionKind;
91   RegionCodeGenTy CodeGen;
92   OpenMPDirectiveKind Kind;
93   bool HasCancel;
94 };
95 
96 /// \brief API for captured statement code generation in OpenMP constructs.
97 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
98 public:
99   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
100                              const RegionCodeGenTy &CodeGen,
101                              OpenMPDirectiveKind Kind, bool HasCancel)
102       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
103                            HasCancel),
104         ThreadIDVar(ThreadIDVar) {
105     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
106   }
107 
108   /// \brief Get a variable or parameter for storing global thread id
109   /// inside OpenMP construct.
110   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
111 
112   /// \brief Get the name of the capture helper.
113   StringRef getHelperName() const override { return ".omp_outlined."; }
114 
115   static bool classof(const CGCapturedStmtInfo *Info) {
116     return CGOpenMPRegionInfo::classof(Info) &&
117            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
118                ParallelOutlinedRegion;
119   }
120 
121 private:
122   /// \brief A variable or parameter storing global thread id for OpenMP
123   /// constructs.
124   const VarDecl *ThreadIDVar;
125 };
126 
127 /// \brief API for captured statement code generation in OpenMP constructs.
128 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
129 public:
130   class UntiedTaskActionTy final : public PrePostActionTy {
131     bool Untied;
132     const VarDecl *PartIDVar;
133     const RegionCodeGenTy UntiedCodeGen;
134     llvm::SwitchInst *UntiedSwitch = nullptr;
135 
136   public:
137     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
138                        const RegionCodeGenTy &UntiedCodeGen)
139         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
140     void Enter(CodeGenFunction &CGF) override {
141       if (Untied) {
142         // Emit task switching point.
143         auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
144             CGF.GetAddrOfLocalVar(PartIDVar),
145             PartIDVar->getType()->castAs<PointerType>());
146         auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, SourceLocation());
147         auto *DoneBB = CGF.createBasicBlock(".untied.done.");
148         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
149         CGF.EmitBlock(DoneBB);
150         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
151         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
152         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
153                               CGF.Builder.GetInsertBlock());
154         emitUntiedSwitch(CGF);
155       }
156     }
157     void emitUntiedSwitch(CodeGenFunction &CGF) const {
158       if (Untied) {
159         auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
160             CGF.GetAddrOfLocalVar(PartIDVar),
161             PartIDVar->getType()->castAs<PointerType>());
162         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
163                               PartIdLVal);
164         UntiedCodeGen(CGF);
165         CodeGenFunction::JumpDest CurPoint =
166             CGF.getJumpDestInCurrentScope(".untied.next.");
167         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
168         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
169         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
170                               CGF.Builder.GetInsertBlock());
171         CGF.EmitBranchThroughCleanup(CurPoint);
172         CGF.EmitBlock(CurPoint.getBlock());
173       }
174     }
175     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
176   };
177   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
178                                  const VarDecl *ThreadIDVar,
179                                  const RegionCodeGenTy &CodeGen,
180                                  OpenMPDirectiveKind Kind, bool HasCancel,
181                                  const UntiedTaskActionTy &Action)
182       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
183         ThreadIDVar(ThreadIDVar), Action(Action) {
184     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
185   }
186 
187   /// \brief Get a variable or parameter for storing global thread id
188   /// inside OpenMP construct.
189   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
190 
191   /// \brief Get an LValue for the current ThreadID variable.
192   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
193 
194   /// \brief Get the name of the capture helper.
195   StringRef getHelperName() const override { return ".omp_outlined."; }
196 
197   void emitUntiedSwitch(CodeGenFunction &CGF) override {
198     Action.emitUntiedSwitch(CGF);
199   }
200 
201   static bool classof(const CGCapturedStmtInfo *Info) {
202     return CGOpenMPRegionInfo::classof(Info) &&
203            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
204                TaskOutlinedRegion;
205   }
206 
207 private:
208   /// \brief A variable or parameter storing global thread id for OpenMP
209   /// constructs.
210   const VarDecl *ThreadIDVar;
211   /// Action for emitting code for untied tasks.
212   const UntiedTaskActionTy &Action;
213 };
214 
215 /// \brief API for inlined captured statement code generation in OpenMP
216 /// constructs.
217 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
218 public:
219   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
220                             const RegionCodeGenTy &CodeGen,
221                             OpenMPDirectiveKind Kind, bool HasCancel)
222       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
223         OldCSI(OldCSI),
224         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
225 
226   // \brief Retrieve the value of the context parameter.
227   llvm::Value *getContextValue() const override {
228     if (OuterRegionInfo)
229       return OuterRegionInfo->getContextValue();
230     llvm_unreachable("No context value for inlined OpenMP region");
231   }
232 
233   void setContextValue(llvm::Value *V) override {
234     if (OuterRegionInfo) {
235       OuterRegionInfo->setContextValue(V);
236       return;
237     }
238     llvm_unreachable("No context value for inlined OpenMP region");
239   }
240 
241   /// \brief Lookup the captured field decl for a variable.
242   const FieldDecl *lookup(const VarDecl *VD) const override {
243     if (OuterRegionInfo)
244       return OuterRegionInfo->lookup(VD);
245     // If there is no outer outlined region,no need to lookup in a list of
246     // captured variables, we can use the original one.
247     return nullptr;
248   }
249 
250   FieldDecl *getThisFieldDecl() const override {
251     if (OuterRegionInfo)
252       return OuterRegionInfo->getThisFieldDecl();
253     return nullptr;
254   }
255 
256   /// \brief Get a variable or parameter for storing global thread id
257   /// inside OpenMP construct.
258   const VarDecl *getThreadIDVariable() const override {
259     if (OuterRegionInfo)
260       return OuterRegionInfo->getThreadIDVariable();
261     return nullptr;
262   }
263 
264   /// \brief Get the name of the capture helper.
265   StringRef getHelperName() const override {
266     if (auto *OuterRegionInfo = getOldCSI())
267       return OuterRegionInfo->getHelperName();
268     llvm_unreachable("No helper name for inlined OpenMP construct");
269   }
270 
271   void emitUntiedSwitch(CodeGenFunction &CGF) override {
272     if (OuterRegionInfo)
273       OuterRegionInfo->emitUntiedSwitch(CGF);
274   }
275 
276   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
277 
278   static bool classof(const CGCapturedStmtInfo *Info) {
279     return CGOpenMPRegionInfo::classof(Info) &&
280            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
281   }
282 
283   ~CGOpenMPInlinedRegionInfo() override = default;
284 
285 private:
286   /// \brief CodeGen info about outer OpenMP region.
287   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
288   CGOpenMPRegionInfo *OuterRegionInfo;
289 };
290 
291 /// \brief API for captured statement code generation in OpenMP target
292 /// constructs. For this captures, implicit parameters are used instead of the
293 /// captured fields. The name of the target region has to be unique in a given
294 /// application so it is provided by the client, because only the client has
295 /// the information to generate that.
296 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
297 public:
298   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
299                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
300       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
301                            /*HasCancel=*/false),
302         HelperName(HelperName) {}
303 
304   /// \brief This is unused for target regions because each starts executing
305   /// with a single thread.
306   const VarDecl *getThreadIDVariable() const override { return nullptr; }
307 
308   /// \brief Get the name of the capture helper.
309   StringRef getHelperName() const override { return HelperName; }
310 
311   static bool classof(const CGCapturedStmtInfo *Info) {
312     return CGOpenMPRegionInfo::classof(Info) &&
313            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
314   }
315 
316 private:
317   StringRef HelperName;
318 };
319 
320 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
321   llvm_unreachable("No codegen for expressions");
322 }
323 /// \brief API for generation of expressions captured in a innermost OpenMP
324 /// region.
325 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
326 public:
327   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
328       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
329                                   OMPD_unknown,
330                                   /*HasCancel=*/false),
331         PrivScope(CGF) {
332     // Make sure the globals captured in the provided statement are local by
333     // using the privatization logic. We assume the same variable is not
334     // captured more than once.
335     for (auto &C : CS.captures()) {
336       if (!C.capturesVariable() && !C.capturesVariableByCopy())
337         continue;
338 
339       const VarDecl *VD = C.getCapturedVar();
340       if (VD->isLocalVarDeclOrParm())
341         continue;
342 
343       DeclRefExpr DRE(const_cast<VarDecl *>(VD),
344                       /*RefersToEnclosingVariableOrCapture=*/false,
345                       VD->getType().getNonReferenceType(), VK_LValue,
346                       SourceLocation());
347       PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address {
348         return CGF.EmitLValue(&DRE).getAddress();
349       });
350     }
351     (void)PrivScope.Privatize();
352   }
353 
354   /// \brief Lookup the captured field decl for a variable.
355   const FieldDecl *lookup(const VarDecl *VD) const override {
356     if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
357       return FD;
358     return nullptr;
359   }
360 
361   /// \brief Emit the captured statement body.
362   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
363     llvm_unreachable("No body for expressions");
364   }
365 
366   /// \brief Get a variable or parameter for storing global thread id
367   /// inside OpenMP construct.
368   const VarDecl *getThreadIDVariable() const override {
369     llvm_unreachable("No thread id for expressions");
370   }
371 
372   /// \brief Get the name of the capture helper.
373   StringRef getHelperName() const override {
374     llvm_unreachable("No helper name for expressions");
375   }
376 
377   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
378 
379 private:
380   /// Private scope to capture global variables.
381   CodeGenFunction::OMPPrivateScope PrivScope;
382 };
383 
384 /// \brief RAII for emitting code of OpenMP constructs.
385 class InlinedOpenMPRegionRAII {
386   CodeGenFunction &CGF;
387   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
388   FieldDecl *LambdaThisCaptureField = nullptr;
389 
390 public:
391   /// \brief Constructs region for combined constructs.
392   /// \param CodeGen Code generation sequence for combined directives. Includes
393   /// a list of functions used for code generation of implicitly inlined
394   /// regions.
395   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
396                           OpenMPDirectiveKind Kind, bool HasCancel)
397       : CGF(CGF) {
398     // Start emission for the construct.
399     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
400         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
401     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
402     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
403     CGF.LambdaThisCaptureField = nullptr;
404   }
405 
406   ~InlinedOpenMPRegionRAII() {
407     // Restore original CapturedStmtInfo only if we're done with code emission.
408     auto *OldCSI =
409         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
410     delete CGF.CapturedStmtInfo;
411     CGF.CapturedStmtInfo = OldCSI;
412     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
413     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
414   }
415 };
416 
417 /// \brief Values for bit flags used in the ident_t to describe the fields.
418 /// All enumeric elements are named and described in accordance with the code
419 /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
420 enum OpenMPLocationFlags {
421   /// \brief Use trampoline for internal microtask.
422   OMP_IDENT_IMD = 0x01,
423   /// \brief Use c-style ident structure.
424   OMP_IDENT_KMPC = 0x02,
425   /// \brief Atomic reduction option for kmpc_reduce.
426   OMP_ATOMIC_REDUCE = 0x10,
427   /// \brief Explicit 'barrier' directive.
428   OMP_IDENT_BARRIER_EXPL = 0x20,
429   /// \brief Implicit barrier in code.
430   OMP_IDENT_BARRIER_IMPL = 0x40,
431   /// \brief Implicit barrier in 'for' directive.
432   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
433   /// \brief Implicit barrier in 'sections' directive.
434   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
435   /// \brief Implicit barrier in 'single' directive.
436   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140
437 };
438 
439 /// \brief Describes ident structure that describes a source location.
440 /// All descriptions are taken from
441 /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
442 /// Original structure:
443 /// typedef struct ident {
444 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
445 ///                                  see above  */
446 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
447 ///                                  KMP_IDENT_KMPC identifies this union
448 ///                                  member  */
449 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
450 ///                                  see above */
451 ///#if USE_ITT_BUILD
452 ///                            /*  but currently used for storing
453 ///                                region-specific ITT */
454 ///                            /*  contextual information. */
455 ///#endif /* USE_ITT_BUILD */
456 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
457 ///                                 C++  */
458 ///    char const *psource;    /**< String describing the source location.
459 ///                            The string is composed of semi-colon separated
460 //                             fields which describe the source file,
461 ///                            the function and a pair of line numbers that
462 ///                            delimit the construct.
463 ///                             */
464 /// } ident_t;
465 enum IdentFieldIndex {
466   /// \brief might be used in Fortran
467   IdentField_Reserved_1,
468   /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
469   IdentField_Flags,
470   /// \brief Not really used in Fortran any more
471   IdentField_Reserved_2,
472   /// \brief Source[4] in Fortran, do not use for C++
473   IdentField_Reserved_3,
474   /// \brief String describing the source location. The string is composed of
475   /// semi-colon separated fields which describe the source file, the function
476   /// and a pair of line numbers that delimit the construct.
477   IdentField_PSource
478 };
479 
480 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from
481 /// the enum sched_type in kmp.h).
482 enum OpenMPSchedType {
483   /// \brief Lower bound for default (unordered) versions.
484   OMP_sch_lower = 32,
485   OMP_sch_static_chunked = 33,
486   OMP_sch_static = 34,
487   OMP_sch_dynamic_chunked = 35,
488   OMP_sch_guided_chunked = 36,
489   OMP_sch_runtime = 37,
490   OMP_sch_auto = 38,
491   /// \brief Lower bound for 'ordered' versions.
492   OMP_ord_lower = 64,
493   OMP_ord_static_chunked = 65,
494   OMP_ord_static = 66,
495   OMP_ord_dynamic_chunked = 67,
496   OMP_ord_guided_chunked = 68,
497   OMP_ord_runtime = 69,
498   OMP_ord_auto = 70,
499   OMP_sch_default = OMP_sch_static,
500   /// \brief dist_schedule types
501   OMP_dist_sch_static_chunked = 91,
502   OMP_dist_sch_static = 92,
503   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
504   /// Set if the monotonic schedule modifier was present.
505   OMP_sch_modifier_monotonic = (1 << 29),
506   /// Set if the nonmonotonic schedule modifier was present.
507   OMP_sch_modifier_nonmonotonic = (1 << 30),
508 };
509 
510 enum OpenMPRTLFunction {
511   /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
512   /// kmpc_micro microtask, ...);
513   OMPRTL__kmpc_fork_call,
514   /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc,
515   /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
516   OMPRTL__kmpc_threadprivate_cached,
517   /// \brief Call to void __kmpc_threadprivate_register( ident_t *,
518   /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
519   OMPRTL__kmpc_threadprivate_register,
520   // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
521   OMPRTL__kmpc_global_thread_num,
522   // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
523   // kmp_critical_name *crit);
524   OMPRTL__kmpc_critical,
525   // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
526   // global_tid, kmp_critical_name *crit, uintptr_t hint);
527   OMPRTL__kmpc_critical_with_hint,
528   // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
529   // kmp_critical_name *crit);
530   OMPRTL__kmpc_end_critical,
531   // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
532   // global_tid);
533   OMPRTL__kmpc_cancel_barrier,
534   // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
535   OMPRTL__kmpc_barrier,
536   // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
537   OMPRTL__kmpc_for_static_fini,
538   // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
539   // global_tid);
540   OMPRTL__kmpc_serialized_parallel,
541   // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
542   // global_tid);
543   OMPRTL__kmpc_end_serialized_parallel,
544   // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
545   // kmp_int32 num_threads);
546   OMPRTL__kmpc_push_num_threads,
547   // Call to void __kmpc_flush(ident_t *loc);
548   OMPRTL__kmpc_flush,
549   // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
550   OMPRTL__kmpc_master,
551   // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
552   OMPRTL__kmpc_end_master,
553   // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
554   // int end_part);
555   OMPRTL__kmpc_omp_taskyield,
556   // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
557   OMPRTL__kmpc_single,
558   // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
559   OMPRTL__kmpc_end_single,
560   // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
561   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
562   // kmp_routine_entry_t *task_entry);
563   OMPRTL__kmpc_omp_task_alloc,
564   // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
565   // new_task);
566   OMPRTL__kmpc_omp_task,
567   // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
568   // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
569   // kmp_int32 didit);
570   OMPRTL__kmpc_copyprivate,
571   // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
572   // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
573   // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
574   OMPRTL__kmpc_reduce,
575   // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
576   // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
577   // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
578   // *lck);
579   OMPRTL__kmpc_reduce_nowait,
580   // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
581   // kmp_critical_name *lck);
582   OMPRTL__kmpc_end_reduce,
583   // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
584   // kmp_critical_name *lck);
585   OMPRTL__kmpc_end_reduce_nowait,
586   // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
587   // kmp_task_t * new_task);
588   OMPRTL__kmpc_omp_task_begin_if0,
589   // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
590   // kmp_task_t * new_task);
591   OMPRTL__kmpc_omp_task_complete_if0,
592   // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
593   OMPRTL__kmpc_ordered,
594   // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
595   OMPRTL__kmpc_end_ordered,
596   // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
597   // global_tid);
598   OMPRTL__kmpc_omp_taskwait,
599   // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
600   OMPRTL__kmpc_taskgroup,
601   // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
602   OMPRTL__kmpc_end_taskgroup,
603   // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
604   // int proc_bind);
605   OMPRTL__kmpc_push_proc_bind,
606   // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
607   // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
608   // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
609   OMPRTL__kmpc_omp_task_with_deps,
610   // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
611   // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
612   // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
613   OMPRTL__kmpc_omp_wait_deps,
614   // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
615   // global_tid, kmp_int32 cncl_kind);
616   OMPRTL__kmpc_cancellationpoint,
617   // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
618   // kmp_int32 cncl_kind);
619   OMPRTL__kmpc_cancel,
620   // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
621   // kmp_int32 num_teams, kmp_int32 thread_limit);
622   OMPRTL__kmpc_push_num_teams,
623   // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
624   // microtask, ...);
625   OMPRTL__kmpc_fork_teams,
626   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
627   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
628   // sched, kmp_uint64 grainsize, void *task_dup);
629   OMPRTL__kmpc_taskloop,
630   // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
631   // num_dims, struct kmp_dim *dims);
632   OMPRTL__kmpc_doacross_init,
633   // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
634   OMPRTL__kmpc_doacross_fini,
635   // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
636   // *vec);
637   OMPRTL__kmpc_doacross_post,
638   // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
639   // *vec);
640   OMPRTL__kmpc_doacross_wait,
641 
642   //
643   // Offloading related calls
644   //
645   // Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
646   // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
647   // *arg_types);
648   OMPRTL__tgt_target,
649   // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
650   // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
651   // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
652   OMPRTL__tgt_target_teams,
653   // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
654   OMPRTL__tgt_register_lib,
655   // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
656   OMPRTL__tgt_unregister_lib,
657   // Call to void __tgt_target_data_begin(int32_t device_id, int32_t arg_num,
658   // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
659   OMPRTL__tgt_target_data_begin,
660   // Call to void __tgt_target_data_end(int32_t device_id, int32_t arg_num,
661   // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
662   OMPRTL__tgt_target_data_end,
663   // Call to void __tgt_target_data_update(int32_t device_id, int32_t arg_num,
664   // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
665   OMPRTL__tgt_target_data_update,
666 };
667 
668 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
669 /// region.
670 class CleanupTy final : public EHScopeStack::Cleanup {
671   PrePostActionTy *Action;
672 
673 public:
674   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
675   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
676     if (!CGF.HaveInsertPoint())
677       return;
678     Action->Exit(CGF);
679   }
680 };
681 
682 } // anonymous namespace
683 
684 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
685   CodeGenFunction::RunCleanupsScope Scope(CGF);
686   if (PrePostAction) {
687     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
688     Callback(CodeGen, CGF, *PrePostAction);
689   } else {
690     PrePostActionTy Action;
691     Callback(CodeGen, CGF, Action);
692   }
693 }
694 
695 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
696   return CGF.EmitLoadOfPointerLValue(
697       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
698       getThreadIDVariable()->getType()->castAs<PointerType>());
699 }
700 
701 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
702   if (!CGF.HaveInsertPoint())
703     return;
704   // 1.2.2 OpenMP Language Terminology
705   // Structured block - An executable statement with a single entry at the
706   // top and a single exit at the bottom.
707   // The point of exit cannot be a branch out of the structured block.
708   // longjmp() and throw() must not violate the entry/exit criteria.
709   CGF.EHStack.pushTerminate();
710   CodeGen(CGF);
711   CGF.EHStack.popTerminate();
712 }
713 
714 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
715     CodeGenFunction &CGF) {
716   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
717                             getThreadIDVariable()->getType(),
718                             AlignmentSource::Decl);
719 }
720 
721 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
722     : CGM(CGM), OffloadEntriesInfoManager(CGM) {
723   IdentTy = llvm::StructType::create(
724       "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
725       CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
726       CGM.Int8PtrTy /* psource */, nullptr);
727   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
728 
729   loadOffloadInfoMetadata();
730 }
731 
732 void CGOpenMPRuntime::clear() {
733   InternalVars.clear();
734 }
735 
736 static llvm::Function *
737 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
738                           const Expr *CombinerInitializer, const VarDecl *In,
739                           const VarDecl *Out, bool IsCombiner) {
740   // void .omp_combiner.(Ty *in, Ty *out);
741   auto &C = CGM.getContext();
742   QualType PtrTy = C.getPointerType(Ty).withRestrict();
743   FunctionArgList Args;
744   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
745                                /*Id=*/nullptr, PtrTy);
746   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
747                               /*Id=*/nullptr, PtrTy);
748   Args.push_back(&OmpOutParm);
749   Args.push_back(&OmpInParm);
750   auto &FnInfo =
751       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
752   auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
753   auto *Fn = llvm::Function::Create(
754       FnTy, llvm::GlobalValue::InternalLinkage,
755       IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule());
756   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
757   Fn->addFnAttr(llvm::Attribute::AlwaysInline);
758   CodeGenFunction CGF(CGM);
759   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
760   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
761   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
762   CodeGenFunction::OMPPrivateScope Scope(CGF);
763   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
764   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address {
765     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
766         .getAddress();
767   });
768   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
769   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() -> Address {
770     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
771         .getAddress();
772   });
773   (void)Scope.Privatize();
774   CGF.EmitIgnoredExpr(CombinerInitializer);
775   Scope.ForceCleanup();
776   CGF.FinishFunction();
777   return Fn;
778 }
779 
780 void CGOpenMPRuntime::emitUserDefinedReduction(
781     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
782   if (UDRMap.count(D) > 0)
783     return;
784   auto &C = CGM.getContext();
785   if (!In || !Out) {
786     In = &C.Idents.get("omp_in");
787     Out = &C.Idents.get("omp_out");
788   }
789   llvm::Function *Combiner = emitCombinerOrInitializer(
790       CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()),
791       cast<VarDecl>(D->lookup(Out).front()),
792       /*IsCombiner=*/true);
793   llvm::Function *Initializer = nullptr;
794   if (auto *Init = D->getInitializer()) {
795     if (!Priv || !Orig) {
796       Priv = &C.Idents.get("omp_priv");
797       Orig = &C.Idents.get("omp_orig");
798     }
799     Initializer = emitCombinerOrInitializer(
800         CGM, D->getType(), Init, cast<VarDecl>(D->lookup(Orig).front()),
801         cast<VarDecl>(D->lookup(Priv).front()),
802         /*IsCombiner=*/false);
803   }
804   UDRMap.insert(std::make_pair(D, std::make_pair(Combiner, Initializer)));
805   if (CGF) {
806     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
807     Decls.second.push_back(D);
808   }
809 }
810 
811 std::pair<llvm::Function *, llvm::Function *>
812 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
813   auto I = UDRMap.find(D);
814   if (I != UDRMap.end())
815     return I->second;
816   emitUserDefinedReduction(/*CGF=*/nullptr, D);
817   return UDRMap.lookup(D);
818 }
819 
820 // Layout information for ident_t.
821 static CharUnits getIdentAlign(CodeGenModule &CGM) {
822   return CGM.getPointerAlign();
823 }
824 static CharUnits getIdentSize(CodeGenModule &CGM) {
825   assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign()));
826   return CharUnits::fromQuantity(16) + CGM.getPointerSize();
827 }
828 static CharUnits getOffsetOfIdentField(IdentFieldIndex Field) {
829   // All the fields except the last are i32, so this works beautifully.
830   return unsigned(Field) * CharUnits::fromQuantity(4);
831 }
832 static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr,
833                                    IdentFieldIndex Field,
834                                    const llvm::Twine &Name = "") {
835   auto Offset = getOffsetOfIdentField(Field);
836   return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name);
837 }
838 
839 llvm::Value *CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction(
840     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
841     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
842   assert(ThreadIDVar->getType()->isPointerType() &&
843          "thread id variable must be of type kmp_int32 *");
844   const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
845   CodeGenFunction CGF(CGM, true);
846   bool HasCancel = false;
847   if (auto *OPD = dyn_cast<OMPParallelDirective>(&D))
848     HasCancel = OPD->hasCancel();
849   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
850     HasCancel = OPSD->hasCancel();
851   else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
852     HasCancel = OPFD->hasCancel();
853   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
854                                     HasCancel);
855   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
856   return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
857 }
858 
859 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
860     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
861     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
862     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
863     bool Tied, unsigned &NumberOfParts) {
864   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
865                                               PrePostActionTy &) {
866     auto *ThreadID = getThreadID(CGF, D.getLocStart());
867     auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart());
868     llvm::Value *TaskArgs[] = {
869         UpLoc, ThreadID,
870         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
871                                     TaskTVar->getType()->castAs<PointerType>())
872             .getPointer()};
873     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
874   };
875   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
876                                                             UntiedCodeGen);
877   CodeGen.setAction(Action);
878   assert(!ThreadIDVar->getType()->isPointerType() &&
879          "thread id variable must be of type kmp_int32 for tasks");
880   auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
881   auto *TD = dyn_cast<OMPTaskDirective>(&D);
882   CodeGenFunction CGF(CGM, true);
883   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
884                                         InnermostKind,
885                                         TD ? TD->hasCancel() : false, Action);
886   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
887   auto *Res = CGF.GenerateCapturedStmtFunction(*CS);
888   if (!Tied)
889     NumberOfParts = Action.getNumberOfParts();
890   return Res;
891 }
892 
893 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
894   CharUnits Align = getIdentAlign(CGM);
895   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
896   if (!Entry) {
897     if (!DefaultOpenMPPSource) {
898       // Initialize default location for psource field of ident_t structure of
899       // all ident_t objects. Format is ";file;function;line;column;;".
900       // Taken from
901       // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
902       DefaultOpenMPPSource =
903           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
904       DefaultOpenMPPSource =
905           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
906     }
907     auto DefaultOpenMPLocation = new llvm::GlobalVariable(
908         CGM.getModule(), IdentTy, /*isConstant*/ true,
909         llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr);
910     DefaultOpenMPLocation->setUnnamedAddr(true);
911     DefaultOpenMPLocation->setAlignment(Align.getQuantity());
912 
913     llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true);
914     llvm::Constant *Values[] = {Zero,
915                                 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
916                                 Zero, Zero, DefaultOpenMPPSource};
917     llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values);
918     DefaultOpenMPLocation->setInitializer(Init);
919     OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
920   }
921   return Address(Entry, Align);
922 }
923 
924 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
925                                                  SourceLocation Loc,
926                                                  unsigned Flags) {
927   Flags |= OMP_IDENT_KMPC;
928   // If no debug info is generated - return global default location.
929   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
930       Loc.isInvalid())
931     return getOrCreateDefaultLocation(Flags).getPointer();
932 
933   assert(CGF.CurFn && "No function in current CodeGenFunction.");
934 
935   Address LocValue = Address::invalid();
936   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
937   if (I != OpenMPLocThreadIDMap.end())
938     LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM));
939 
940   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
941   // GetOpenMPThreadID was called before this routine.
942   if (!LocValue.isValid()) {
943     // Generate "ident_t .kmpc_loc.addr;"
944     Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM),
945                                       ".kmpc_loc.addr");
946     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
947     Elem.second.DebugLoc = AI.getPointer();
948     LocValue = AI;
949 
950     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
951     CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
952     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
953                              CGM.getSize(getIdentSize(CGF.CGM)));
954   }
955 
956   // char **psource = &.kmpc_loc_<flags>.addr.psource;
957   Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource);
958 
959   auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
960   if (OMPDebugLoc == nullptr) {
961     SmallString<128> Buffer2;
962     llvm::raw_svector_ostream OS2(Buffer2);
963     // Build debug location
964     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
965     OS2 << ";" << PLoc.getFilename() << ";";
966     if (const FunctionDecl *FD =
967             dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
968       OS2 << FD->getQualifiedNameAsString();
969     }
970     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
971     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
972     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
973   }
974   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
975   CGF.Builder.CreateStore(OMPDebugLoc, PSource);
976 
977   // Our callers always pass this to a runtime function, so for
978   // convenience, go ahead and return a naked pointer.
979   return LocValue.getPointer();
980 }
981 
982 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
983                                           SourceLocation Loc) {
984   assert(CGF.CurFn && "No function in current CodeGenFunction.");
985 
986   llvm::Value *ThreadID = nullptr;
987   // Check whether we've already cached a load of the thread id in this
988   // function.
989   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
990   if (I != OpenMPLocThreadIDMap.end()) {
991     ThreadID = I->second.ThreadID;
992     if (ThreadID != nullptr)
993       return ThreadID;
994   }
995   if (auto *OMPRegionInfo =
996           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
997     if (OMPRegionInfo->getThreadIDVariable()) {
998       // Check if this an outlined function with thread id passed as argument.
999       auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1000       ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
1001       // If value loaded in entry block, cache it and use it everywhere in
1002       // function.
1003       if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1004         auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1005         Elem.second.ThreadID = ThreadID;
1006       }
1007       return ThreadID;
1008     }
1009   }
1010 
1011   // This is not an outlined function region - need to call __kmpc_int32
1012   // kmpc_global_thread_num(ident_t *loc).
1013   // Generate thread id value and cache this value for use across the
1014   // function.
1015   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1016   CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
1017   ThreadID =
1018       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1019                           emitUpdateLocation(CGF, Loc));
1020   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1021   Elem.second.ThreadID = ThreadID;
1022   return ThreadID;
1023 }
1024 
1025 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1026   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1027   if (OpenMPLocThreadIDMap.count(CGF.CurFn))
1028     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1029   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1030     for(auto *D : FunctionUDRMap[CGF.CurFn]) {
1031       UDRMap.erase(D);
1032     }
1033     FunctionUDRMap.erase(CGF.CurFn);
1034   }
1035 }
1036 
1037 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1038   if (!IdentTy) {
1039   }
1040   return llvm::PointerType::getUnqual(IdentTy);
1041 }
1042 
1043 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1044   if (!Kmpc_MicroTy) {
1045     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1046     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1047                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1048     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1049   }
1050   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1051 }
1052 
1053 llvm::Constant *
1054 CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1055   llvm::Constant *RTLFn = nullptr;
1056   switch (static_cast<OpenMPRTLFunction>(Function)) {
1057   case OMPRTL__kmpc_fork_call: {
1058     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1059     // microtask, ...);
1060     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1061                                 getKmpc_MicroPointerTy()};
1062     llvm::FunctionType *FnTy =
1063         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1064     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1065     break;
1066   }
1067   case OMPRTL__kmpc_global_thread_num: {
1068     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1069     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1070     llvm::FunctionType *FnTy =
1071         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1072     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1073     break;
1074   }
1075   case OMPRTL__kmpc_threadprivate_cached: {
1076     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1077     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1078     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1079                                 CGM.VoidPtrTy, CGM.SizeTy,
1080                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1081     llvm::FunctionType *FnTy =
1082         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1083     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1084     break;
1085   }
1086   case OMPRTL__kmpc_critical: {
1087     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1088     // kmp_critical_name *crit);
1089     llvm::Type *TypeParams[] = {
1090         getIdentTyPointerTy(), CGM.Int32Ty,
1091         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1092     llvm::FunctionType *FnTy =
1093         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1094     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1095     break;
1096   }
1097   case OMPRTL__kmpc_critical_with_hint: {
1098     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1099     // kmp_critical_name *crit, uintptr_t hint);
1100     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1101                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
1102                                 CGM.IntPtrTy};
1103     llvm::FunctionType *FnTy =
1104         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1105     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1106     break;
1107   }
1108   case OMPRTL__kmpc_threadprivate_register: {
1109     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1110     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1111     // typedef void *(*kmpc_ctor)(void *);
1112     auto KmpcCtorTy =
1113         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1114                                 /*isVarArg*/ false)->getPointerTo();
1115     // typedef void *(*kmpc_cctor)(void *, void *);
1116     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1117     auto KmpcCopyCtorTy =
1118         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1119                                 /*isVarArg*/ false)->getPointerTo();
1120     // typedef void (*kmpc_dtor)(void *);
1121     auto KmpcDtorTy =
1122         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1123             ->getPointerTo();
1124     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1125                               KmpcCopyCtorTy, KmpcDtorTy};
1126     auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1127                                         /*isVarArg*/ false);
1128     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1129     break;
1130   }
1131   case OMPRTL__kmpc_end_critical: {
1132     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1133     // kmp_critical_name *crit);
1134     llvm::Type *TypeParams[] = {
1135         getIdentTyPointerTy(), CGM.Int32Ty,
1136         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1137     llvm::FunctionType *FnTy =
1138         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1139     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1140     break;
1141   }
1142   case OMPRTL__kmpc_cancel_barrier: {
1143     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1144     // global_tid);
1145     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1146     llvm::FunctionType *FnTy =
1147         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1148     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1149     break;
1150   }
1151   case OMPRTL__kmpc_barrier: {
1152     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1153     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1154     llvm::FunctionType *FnTy =
1155         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1156     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1157     break;
1158   }
1159   case OMPRTL__kmpc_for_static_fini: {
1160     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1161     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1162     llvm::FunctionType *FnTy =
1163         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1164     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1165     break;
1166   }
1167   case OMPRTL__kmpc_push_num_threads: {
1168     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1169     // kmp_int32 num_threads)
1170     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1171                                 CGM.Int32Ty};
1172     llvm::FunctionType *FnTy =
1173         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1174     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1175     break;
1176   }
1177   case OMPRTL__kmpc_serialized_parallel: {
1178     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1179     // global_tid);
1180     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1181     llvm::FunctionType *FnTy =
1182         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1183     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1184     break;
1185   }
1186   case OMPRTL__kmpc_end_serialized_parallel: {
1187     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1188     // global_tid);
1189     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1190     llvm::FunctionType *FnTy =
1191         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1192     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1193     break;
1194   }
1195   case OMPRTL__kmpc_flush: {
1196     // Build void __kmpc_flush(ident_t *loc);
1197     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1198     llvm::FunctionType *FnTy =
1199         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1200     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1201     break;
1202   }
1203   case OMPRTL__kmpc_master: {
1204     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1205     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1206     llvm::FunctionType *FnTy =
1207         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1208     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1209     break;
1210   }
1211   case OMPRTL__kmpc_end_master: {
1212     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1213     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1214     llvm::FunctionType *FnTy =
1215         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1216     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1217     break;
1218   }
1219   case OMPRTL__kmpc_omp_taskyield: {
1220     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1221     // int end_part);
1222     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1223     llvm::FunctionType *FnTy =
1224         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1225     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1226     break;
1227   }
1228   case OMPRTL__kmpc_single: {
1229     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1230     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1231     llvm::FunctionType *FnTy =
1232         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1233     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1234     break;
1235   }
1236   case OMPRTL__kmpc_end_single: {
1237     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1238     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1239     llvm::FunctionType *FnTy =
1240         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1241     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1242     break;
1243   }
1244   case OMPRTL__kmpc_omp_task_alloc: {
1245     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1246     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1247     // kmp_routine_entry_t *task_entry);
1248     assert(KmpRoutineEntryPtrTy != nullptr &&
1249            "Type kmp_routine_entry_t must be created.");
1250     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1251                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
1252     // Return void * and then cast to particular kmp_task_t type.
1253     llvm::FunctionType *FnTy =
1254         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1255     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
1256     break;
1257   }
1258   case OMPRTL__kmpc_omp_task: {
1259     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1260     // *new_task);
1261     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1262                                 CGM.VoidPtrTy};
1263     llvm::FunctionType *FnTy =
1264         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1265     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
1266     break;
1267   }
1268   case OMPRTL__kmpc_copyprivate: {
1269     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
1270     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
1271     // kmp_int32 didit);
1272     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1273     auto *CpyFnTy =
1274         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
1275     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
1276                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
1277                                 CGM.Int32Ty};
1278     llvm::FunctionType *FnTy =
1279         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1280     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
1281     break;
1282   }
1283   case OMPRTL__kmpc_reduce: {
1284     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
1285     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
1286     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
1287     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1288     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1289                                                /*isVarArg=*/false);
1290     llvm::Type *TypeParams[] = {
1291         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1292         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1293         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1294     llvm::FunctionType *FnTy =
1295         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1296     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
1297     break;
1298   }
1299   case OMPRTL__kmpc_reduce_nowait: {
1300     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
1301     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
1302     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
1303     // *lck);
1304     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1305     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1306                                                /*isVarArg=*/false);
1307     llvm::Type *TypeParams[] = {
1308         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1309         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1310         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1311     llvm::FunctionType *FnTy =
1312         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1313     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
1314     break;
1315   }
1316   case OMPRTL__kmpc_end_reduce: {
1317     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
1318     // kmp_critical_name *lck);
1319     llvm::Type *TypeParams[] = {
1320         getIdentTyPointerTy(), CGM.Int32Ty,
1321         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1322     llvm::FunctionType *FnTy =
1323         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1324     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
1325     break;
1326   }
1327   case OMPRTL__kmpc_end_reduce_nowait: {
1328     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
1329     // kmp_critical_name *lck);
1330     llvm::Type *TypeParams[] = {
1331         getIdentTyPointerTy(), CGM.Int32Ty,
1332         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1333     llvm::FunctionType *FnTy =
1334         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1335     RTLFn =
1336         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
1337     break;
1338   }
1339   case OMPRTL__kmpc_omp_task_begin_if0: {
1340     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1341     // *new_task);
1342     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1343                                 CGM.VoidPtrTy};
1344     llvm::FunctionType *FnTy =
1345         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1346     RTLFn =
1347         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
1348     break;
1349   }
1350   case OMPRTL__kmpc_omp_task_complete_if0: {
1351     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1352     // *new_task);
1353     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1354                                 CGM.VoidPtrTy};
1355     llvm::FunctionType *FnTy =
1356         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1357     RTLFn = CGM.CreateRuntimeFunction(FnTy,
1358                                       /*Name=*/"__kmpc_omp_task_complete_if0");
1359     break;
1360   }
1361   case OMPRTL__kmpc_ordered: {
1362     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
1363     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1364     llvm::FunctionType *FnTy =
1365         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1366     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
1367     break;
1368   }
1369   case OMPRTL__kmpc_end_ordered: {
1370     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
1371     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1372     llvm::FunctionType *FnTy =
1373         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1374     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
1375     break;
1376   }
1377   case OMPRTL__kmpc_omp_taskwait: {
1378     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
1379     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1380     llvm::FunctionType *FnTy =
1381         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1382     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
1383     break;
1384   }
1385   case OMPRTL__kmpc_taskgroup: {
1386     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
1387     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1388     llvm::FunctionType *FnTy =
1389         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1390     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
1391     break;
1392   }
1393   case OMPRTL__kmpc_end_taskgroup: {
1394     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
1395     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1396     llvm::FunctionType *FnTy =
1397         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1398     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
1399     break;
1400   }
1401   case OMPRTL__kmpc_push_proc_bind: {
1402     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
1403     // int proc_bind)
1404     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1405     llvm::FunctionType *FnTy =
1406         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1407     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
1408     break;
1409   }
1410   case OMPRTL__kmpc_omp_task_with_deps: {
1411     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
1412     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
1413     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
1414     llvm::Type *TypeParams[] = {
1415         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
1416         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
1417     llvm::FunctionType *FnTy =
1418         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1419     RTLFn =
1420         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
1421     break;
1422   }
1423   case OMPRTL__kmpc_omp_wait_deps: {
1424     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
1425     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
1426     // kmp_depend_info_t *noalias_dep_list);
1427     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1428                                 CGM.Int32Ty,           CGM.VoidPtrTy,
1429                                 CGM.Int32Ty,           CGM.VoidPtrTy};
1430     llvm::FunctionType *FnTy =
1431         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1432     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
1433     break;
1434   }
1435   case OMPRTL__kmpc_cancellationpoint: {
1436     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
1437     // global_tid, kmp_int32 cncl_kind)
1438     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1439     llvm::FunctionType *FnTy =
1440         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1441     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
1442     break;
1443   }
1444   case OMPRTL__kmpc_cancel: {
1445     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
1446     // kmp_int32 cncl_kind)
1447     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1448     llvm::FunctionType *FnTy =
1449         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1450     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
1451     break;
1452   }
1453   case OMPRTL__kmpc_push_num_teams: {
1454     // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
1455     // kmp_int32 num_teams, kmp_int32 num_threads)
1456     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1457         CGM.Int32Ty};
1458     llvm::FunctionType *FnTy =
1459         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1460     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
1461     break;
1462   }
1463   case OMPRTL__kmpc_fork_teams: {
1464     // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
1465     // microtask, ...);
1466     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1467                                 getKmpc_MicroPointerTy()};
1468     llvm::FunctionType *FnTy =
1469         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1470     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
1471     break;
1472   }
1473   case OMPRTL__kmpc_taskloop: {
1474     // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
1475     // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
1476     // sched, kmp_uint64 grainsize, void *task_dup);
1477     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
1478                                 CGM.IntTy,
1479                                 CGM.VoidPtrTy,
1480                                 CGM.IntTy,
1481                                 CGM.Int64Ty->getPointerTo(),
1482                                 CGM.Int64Ty->getPointerTo(),
1483                                 CGM.Int64Ty,
1484                                 CGM.IntTy,
1485                                 CGM.IntTy,
1486                                 CGM.Int64Ty,
1487                                 CGM.VoidPtrTy};
1488     llvm::FunctionType *FnTy =
1489         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1490     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
1491     break;
1492   }
1493   case OMPRTL__kmpc_doacross_init: {
1494     // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
1495     // num_dims, struct kmp_dim *dims);
1496     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
1497                                 CGM.Int32Ty,
1498                                 CGM.Int32Ty,
1499                                 CGM.VoidPtrTy};
1500     llvm::FunctionType *FnTy =
1501         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1502     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
1503     break;
1504   }
1505   case OMPRTL__kmpc_doacross_fini: {
1506     // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
1507     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1508     llvm::FunctionType *FnTy =
1509         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1510     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
1511     break;
1512   }
1513   case OMPRTL__kmpc_doacross_post: {
1514     // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
1515     // *vec);
1516     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1517                                 CGM.Int64Ty->getPointerTo()};
1518     llvm::FunctionType *FnTy =
1519         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1520     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
1521     break;
1522   }
1523   case OMPRTL__kmpc_doacross_wait: {
1524     // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
1525     // *vec);
1526     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1527                                 CGM.Int64Ty->getPointerTo()};
1528     llvm::FunctionType *FnTy =
1529         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1530     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
1531     break;
1532   }
1533   case OMPRTL__tgt_target: {
1534     // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
1535     // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
1536     // *arg_types);
1537     llvm::Type *TypeParams[] = {CGM.Int32Ty,
1538                                 CGM.VoidPtrTy,
1539                                 CGM.Int32Ty,
1540                                 CGM.VoidPtrPtrTy,
1541                                 CGM.VoidPtrPtrTy,
1542                                 CGM.SizeTy->getPointerTo(),
1543                                 CGM.Int32Ty->getPointerTo()};
1544     llvm::FunctionType *FnTy =
1545         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1546     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
1547     break;
1548   }
1549   case OMPRTL__tgt_target_teams: {
1550     // Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
1551     // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
1552     // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
1553     llvm::Type *TypeParams[] = {CGM.Int32Ty,
1554                                 CGM.VoidPtrTy,
1555                                 CGM.Int32Ty,
1556                                 CGM.VoidPtrPtrTy,
1557                                 CGM.VoidPtrPtrTy,
1558                                 CGM.SizeTy->getPointerTo(),
1559                                 CGM.Int32Ty->getPointerTo(),
1560                                 CGM.Int32Ty,
1561                                 CGM.Int32Ty};
1562     llvm::FunctionType *FnTy =
1563         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1564     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
1565     break;
1566   }
1567   case OMPRTL__tgt_register_lib: {
1568     // Build void __tgt_register_lib(__tgt_bin_desc *desc);
1569     QualType ParamTy =
1570         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
1571     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
1572     llvm::FunctionType *FnTy =
1573         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1574     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
1575     break;
1576   }
1577   case OMPRTL__tgt_unregister_lib: {
1578     // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
1579     QualType ParamTy =
1580         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
1581     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
1582     llvm::FunctionType *FnTy =
1583         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1584     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
1585     break;
1586   }
1587   case OMPRTL__tgt_target_data_begin: {
1588     // Build void __tgt_target_data_begin(int32_t device_id, int32_t arg_num,
1589     // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
1590     llvm::Type *TypeParams[] = {CGM.Int32Ty,
1591                                 CGM.Int32Ty,
1592                                 CGM.VoidPtrPtrTy,
1593                                 CGM.VoidPtrPtrTy,
1594                                 CGM.SizeTy->getPointerTo(),
1595                                 CGM.Int32Ty->getPointerTo()};
1596     llvm::FunctionType *FnTy =
1597         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1598     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
1599     break;
1600   }
1601   case OMPRTL__tgt_target_data_end: {
1602     // Build void __tgt_target_data_end(int32_t device_id, int32_t arg_num,
1603     // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
1604     llvm::Type *TypeParams[] = {CGM.Int32Ty,
1605                                 CGM.Int32Ty,
1606                                 CGM.VoidPtrPtrTy,
1607                                 CGM.VoidPtrPtrTy,
1608                                 CGM.SizeTy->getPointerTo(),
1609                                 CGM.Int32Ty->getPointerTo()};
1610     llvm::FunctionType *FnTy =
1611         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1612     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
1613     break;
1614   }
1615   case OMPRTL__tgt_target_data_update: {
1616     // Build void __tgt_target_data_update(int32_t device_id, int32_t arg_num,
1617     // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
1618     llvm::Type *TypeParams[] = {CGM.Int32Ty,
1619                                 CGM.Int32Ty,
1620                                 CGM.VoidPtrPtrTy,
1621                                 CGM.VoidPtrPtrTy,
1622                                 CGM.SizeTy->getPointerTo(),
1623                                 CGM.Int32Ty->getPointerTo()};
1624     llvm::FunctionType *FnTy =
1625         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1626     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
1627     break;
1628   }
1629   }
1630   assert(RTLFn && "Unable to find OpenMP runtime function");
1631   return RTLFn;
1632 }
1633 
1634 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
1635                                                              bool IVSigned) {
1636   assert((IVSize == 32 || IVSize == 64) &&
1637          "IV size is not compatible with the omp runtime");
1638   auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1639                                        : "__kmpc_for_static_init_4u")
1640                            : (IVSigned ? "__kmpc_for_static_init_8"
1641                                        : "__kmpc_for_static_init_8u");
1642   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1643   auto PtrTy = llvm::PointerType::getUnqual(ITy);
1644   llvm::Type *TypeParams[] = {
1645     getIdentTyPointerTy(),                     // loc
1646     CGM.Int32Ty,                               // tid
1647     CGM.Int32Ty,                               // schedtype
1648     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1649     PtrTy,                                     // p_lower
1650     PtrTy,                                     // p_upper
1651     PtrTy,                                     // p_stride
1652     ITy,                                       // incr
1653     ITy                                        // chunk
1654   };
1655   llvm::FunctionType *FnTy =
1656       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1657   return CGM.CreateRuntimeFunction(FnTy, Name);
1658 }
1659 
1660 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
1661                                                             bool IVSigned) {
1662   assert((IVSize == 32 || IVSize == 64) &&
1663          "IV size is not compatible with the omp runtime");
1664   auto Name =
1665       IVSize == 32
1666           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1667           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1668   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1669   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1670                                CGM.Int32Ty,           // tid
1671                                CGM.Int32Ty,           // schedtype
1672                                ITy,                   // lower
1673                                ITy,                   // upper
1674                                ITy,                   // stride
1675                                ITy                    // chunk
1676   };
1677   llvm::FunctionType *FnTy =
1678       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1679   return CGM.CreateRuntimeFunction(FnTy, Name);
1680 }
1681 
1682 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
1683                                                             bool IVSigned) {
1684   assert((IVSize == 32 || IVSize == 64) &&
1685          "IV size is not compatible with the omp runtime");
1686   auto Name =
1687       IVSize == 32
1688           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1689           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1690   llvm::Type *TypeParams[] = {
1691       getIdentTyPointerTy(), // loc
1692       CGM.Int32Ty,           // tid
1693   };
1694   llvm::FunctionType *FnTy =
1695       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1696   return CGM.CreateRuntimeFunction(FnTy, Name);
1697 }
1698 
1699 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
1700                                                             bool IVSigned) {
1701   assert((IVSize == 32 || IVSize == 64) &&
1702          "IV size is not compatible with the omp runtime");
1703   auto Name =
1704       IVSize == 32
1705           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1706           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1707   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1708   auto PtrTy = llvm::PointerType::getUnqual(ITy);
1709   llvm::Type *TypeParams[] = {
1710     getIdentTyPointerTy(),                     // loc
1711     CGM.Int32Ty,                               // tid
1712     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1713     PtrTy,                                     // p_lower
1714     PtrTy,                                     // p_upper
1715     PtrTy                                      // p_stride
1716   };
1717   llvm::FunctionType *FnTy =
1718       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1719   return CGM.CreateRuntimeFunction(FnTy, Name);
1720 }
1721 
1722 llvm::Constant *
1723 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1724   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1725          !CGM.getContext().getTargetInfo().isTLSSupported());
1726   // Lookup the entry, lazily creating it if necessary.
1727   return getOrCreateInternalVariable(CGM.Int8PtrPtrTy,
1728                                      Twine(CGM.getMangledName(VD)) + ".cache.");
1729 }
1730 
1731 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1732                                                 const VarDecl *VD,
1733                                                 Address VDAddr,
1734                                                 SourceLocation Loc) {
1735   if (CGM.getLangOpts().OpenMPUseTLS &&
1736       CGM.getContext().getTargetInfo().isTLSSupported())
1737     return VDAddr;
1738 
1739   auto VarTy = VDAddr.getElementType();
1740   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1741                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1742                                                        CGM.Int8PtrTy),
1743                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1744                          getOrCreateThreadPrivateCache(VD)};
1745   return Address(CGF.EmitRuntimeCall(
1746       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
1747                  VDAddr.getAlignment());
1748 }
1749 
1750 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1751     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1752     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1753   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1754   // library.
1755   auto OMPLoc = emitUpdateLocation(CGF, Loc);
1756   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1757                       OMPLoc);
1758   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1759   // to register constructor/destructor for variable.
1760   llvm::Value *Args[] = {OMPLoc,
1761                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1762                                                        CGM.VoidPtrTy),
1763                          Ctor, CopyCtor, Dtor};
1764   CGF.EmitRuntimeCall(
1765       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
1766 }
1767 
1768 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1769     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1770     bool PerformInit, CodeGenFunction *CGF) {
1771   if (CGM.getLangOpts().OpenMPUseTLS &&
1772       CGM.getContext().getTargetInfo().isTLSSupported())
1773     return nullptr;
1774 
1775   VD = VD->getDefinition(CGM.getContext());
1776   if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
1777     ThreadPrivateWithDefinition.insert(VD);
1778     QualType ASTTy = VD->getType();
1779 
1780     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1781     auto Init = VD->getAnyInitializer();
1782     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1783       // Generate function that re-emits the declaration's initializer into the
1784       // threadprivate copy of the variable VD
1785       CodeGenFunction CtorCGF(CGM);
1786       FunctionArgList Args;
1787       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
1788                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
1789       Args.push_back(&Dst);
1790 
1791       auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1792           CGM.getContext().VoidPtrTy, Args);
1793       auto FTy = CGM.getTypes().GetFunctionType(FI);
1794       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
1795           FTy, ".__kmpc_global_ctor_.", FI, Loc);
1796       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1797                             Args, SourceLocation());
1798       auto ArgVal = CtorCGF.EmitLoadOfScalar(
1799           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1800           CGM.getContext().VoidPtrTy, Dst.getLocation());
1801       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1802       Arg = CtorCGF.Builder.CreateElementBitCast(Arg,
1803                                              CtorCGF.ConvertTypeForMem(ASTTy));
1804       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1805                                /*IsInitializer=*/true);
1806       ArgVal = CtorCGF.EmitLoadOfScalar(
1807           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1808           CGM.getContext().VoidPtrTy, Dst.getLocation());
1809       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1810       CtorCGF.FinishFunction();
1811       Ctor = Fn;
1812     }
1813     if (VD->getType().isDestructedType() != QualType::DK_none) {
1814       // Generate function that emits destructor call for the threadprivate copy
1815       // of the variable VD
1816       CodeGenFunction DtorCGF(CGM);
1817       FunctionArgList Args;
1818       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
1819                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
1820       Args.push_back(&Dst);
1821 
1822       auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1823           CGM.getContext().VoidTy, Args);
1824       auto FTy = CGM.getTypes().GetFunctionType(FI);
1825       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
1826           FTy, ".__kmpc_global_dtor_.", FI, Loc);
1827       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1828       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1829                             SourceLocation());
1830       // Create a scope with an artificial location for the body of this function.
1831       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1832       auto ArgVal = DtorCGF.EmitLoadOfScalar(
1833           DtorCGF.GetAddrOfLocalVar(&Dst),
1834           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1835       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1836                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1837                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1838       DtorCGF.FinishFunction();
1839       Dtor = Fn;
1840     }
1841     // Do not emit init function if it is not required.
1842     if (!Ctor && !Dtor)
1843       return nullptr;
1844 
1845     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1846     auto CopyCtorTy =
1847         llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1848                                 /*isVarArg=*/false)->getPointerTo();
1849     // Copying constructor for the threadprivate variable.
1850     // Must be NULL - reserved by runtime, but currently it requires that this
1851     // parameter is always NULL. Otherwise it fires assertion.
1852     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1853     if (Ctor == nullptr) {
1854       auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1855                                             /*isVarArg=*/false)->getPointerTo();
1856       Ctor = llvm::Constant::getNullValue(CtorTy);
1857     }
1858     if (Dtor == nullptr) {
1859       auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1860                                             /*isVarArg=*/false)->getPointerTo();
1861       Dtor = llvm::Constant::getNullValue(DtorTy);
1862     }
1863     if (!CGF) {
1864       auto InitFunctionTy =
1865           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1866       auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
1867           InitFunctionTy, ".__omp_threadprivate_init_.",
1868           CGM.getTypes().arrangeNullaryFunction());
1869       CodeGenFunction InitCGF(CGM);
1870       FunctionArgList ArgList;
1871       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1872                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1873                             Loc);
1874       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1875       InitCGF.FinishFunction();
1876       return InitFunction;
1877     }
1878     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1879   }
1880   return nullptr;
1881 }
1882 
1883 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
1884 /// function. Here is the logic:
1885 /// if (Cond) {
1886 ///   ThenGen();
1887 /// } else {
1888 ///   ElseGen();
1889 /// }
1890 static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
1891                             const RegionCodeGenTy &ThenGen,
1892                             const RegionCodeGenTy &ElseGen) {
1893   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1894 
1895   // If the condition constant folds and can be elided, try to avoid emitting
1896   // the condition and the dead arm of the if/else.
1897   bool CondConstant;
1898   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1899     if (CondConstant)
1900       ThenGen(CGF);
1901     else
1902       ElseGen(CGF);
1903     return;
1904   }
1905 
1906   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
1907   // emit the conditional branch.
1908   auto ThenBlock = CGF.createBasicBlock("omp_if.then");
1909   auto ElseBlock = CGF.createBasicBlock("omp_if.else");
1910   auto ContBlock = CGF.createBasicBlock("omp_if.end");
1911   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1912 
1913   // Emit the 'then' code.
1914   CGF.EmitBlock(ThenBlock);
1915   ThenGen(CGF);
1916   CGF.EmitBranch(ContBlock);
1917   // Emit the 'else' code if present.
1918   // There is no need to emit line number for unconditional branch.
1919   (void)ApplyDebugLocation::CreateEmpty(CGF);
1920   CGF.EmitBlock(ElseBlock);
1921   ElseGen(CGF);
1922   // There is no need to emit line number for unconditional branch.
1923   (void)ApplyDebugLocation::CreateEmpty(CGF);
1924   CGF.EmitBranch(ContBlock);
1925   // Emit the continuation block for code after the if.
1926   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1927 }
1928 
1929 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1930                                        llvm::Value *OutlinedFn,
1931                                        ArrayRef<llvm::Value *> CapturedVars,
1932                                        const Expr *IfCond) {
1933   if (!CGF.HaveInsertPoint())
1934     return;
1935   auto *RTLoc = emitUpdateLocation(CGF, Loc);
1936   auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
1937                                                      PrePostActionTy &) {
1938     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1939     auto &RT = CGF.CGM.getOpenMPRuntime();
1940     llvm::Value *Args[] = {
1941         RTLoc,
1942         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1943         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
1944     llvm::SmallVector<llvm::Value *, 16> RealArgs;
1945     RealArgs.append(std::begin(Args), std::end(Args));
1946     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1947 
1948     auto RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
1949     CGF.EmitRuntimeCall(RTLFn, RealArgs);
1950   };
1951   auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
1952                                                           PrePostActionTy &) {
1953     auto &RT = CGF.CGM.getOpenMPRuntime();
1954     auto ThreadID = RT.getThreadID(CGF, Loc);
1955     // Build calls:
1956     // __kmpc_serialized_parallel(&Loc, GTid);
1957     llvm::Value *Args[] = {RTLoc, ThreadID};
1958     CGF.EmitRuntimeCall(
1959         RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
1960 
1961     // OutlinedFn(&GTid, &zero, CapturedStruct);
1962     auto ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1963     Address ZeroAddr =
1964         CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
1965                              /*Name*/ ".zero.addr");
1966     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
1967     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
1968     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
1969     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
1970     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1971     CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
1972 
1973     // __kmpc_end_serialized_parallel(&Loc, GTid);
1974     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1975     CGF.EmitRuntimeCall(
1976         RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
1977         EndArgs);
1978   };
1979   if (IfCond)
1980     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
1981   else {
1982     RegionCodeGenTy ThenRCG(ThenGen);
1983     ThenRCG(CGF);
1984   }
1985 }
1986 
1987 // If we're inside an (outlined) parallel region, use the region info's
1988 // thread-ID variable (it is passed in a first argument of the outlined function
1989 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1990 // regular serial code region, get thread ID by calling kmp_int32
1991 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1992 // return the address of that temp.
1993 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
1994                                              SourceLocation Loc) {
1995   if (auto *OMPRegionInfo =
1996           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1997     if (OMPRegionInfo->getThreadIDVariable())
1998       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1999 
2000   auto ThreadID = getThreadID(CGF, Loc);
2001   auto Int32Ty =
2002       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2003   auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2004   CGF.EmitStoreOfScalar(ThreadID,
2005                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2006 
2007   return ThreadIDTemp;
2008 }
2009 
2010 llvm::Constant *
2011 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
2012                                              const llvm::Twine &Name) {
2013   SmallString<256> Buffer;
2014   llvm::raw_svector_ostream Out(Buffer);
2015   Out << Name;
2016   auto RuntimeName = Out.str();
2017   auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
2018   if (Elem.second) {
2019     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2020            "OMP internal variable has different type than requested");
2021     return &*Elem.second;
2022   }
2023 
2024   return Elem.second = new llvm::GlobalVariable(
2025              CGM.getModule(), Ty, /*IsConstant*/ false,
2026              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2027              Elem.first());
2028 }
2029 
2030 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2031   llvm::Twine Name(".gomp_critical_user_", CriticalName);
2032   return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
2033 }
2034 
2035 namespace {
2036 /// Common pre(post)-action for different OpenMP constructs.
2037 class CommonActionTy final : public PrePostActionTy {
2038   llvm::Value *EnterCallee;
2039   ArrayRef<llvm::Value *> EnterArgs;
2040   llvm::Value *ExitCallee;
2041   ArrayRef<llvm::Value *> ExitArgs;
2042   bool Conditional;
2043   llvm::BasicBlock *ContBlock = nullptr;
2044 
2045 public:
2046   CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs,
2047                  llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs,
2048                  bool Conditional = false)
2049       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2050         ExitArgs(ExitArgs), Conditional(Conditional) {}
2051   void Enter(CodeGenFunction &CGF) override {
2052     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2053     if (Conditional) {
2054       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2055       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2056       ContBlock = CGF.createBasicBlock("omp_if.end");
2057       // Generate the branch (If-stmt)
2058       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2059       CGF.EmitBlock(ThenBlock);
2060     }
2061   }
2062   void Done(CodeGenFunction &CGF) {
2063     // Emit the rest of blocks/branches
2064     CGF.EmitBranch(ContBlock);
2065     CGF.EmitBlock(ContBlock, true);
2066   }
2067   void Exit(CodeGenFunction &CGF) override {
2068     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2069   }
2070 };
2071 } // anonymous namespace
2072 
2073 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2074                                          StringRef CriticalName,
2075                                          const RegionCodeGenTy &CriticalOpGen,
2076                                          SourceLocation Loc, const Expr *Hint) {
2077   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2078   // CriticalOpGen();
2079   // __kmpc_end_critical(ident_t *, gtid, Lock);
2080   // Prepare arguments and build a call to __kmpc_critical
2081   if (!CGF.HaveInsertPoint())
2082     return;
2083   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2084                          getCriticalRegionLock(CriticalName)};
2085   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2086                                                 std::end(Args));
2087   if (Hint) {
2088     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2089         CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
2090   }
2091   CommonActionTy Action(
2092       createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
2093                                  : OMPRTL__kmpc_critical),
2094       EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
2095   CriticalOpGen.setAction(Action);
2096   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2097 }
2098 
2099 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2100                                        const RegionCodeGenTy &MasterOpGen,
2101                                        SourceLocation Loc) {
2102   if (!CGF.HaveInsertPoint())
2103     return;
2104   // if(__kmpc_master(ident_t *, gtid)) {
2105   //   MasterOpGen();
2106   //   __kmpc_end_master(ident_t *, gtid);
2107   // }
2108   // Prepare arguments and build a call to __kmpc_master
2109   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2110   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
2111                         createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
2112                         /*Conditional=*/true);
2113   MasterOpGen.setAction(Action);
2114   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2115   Action.Done(CGF);
2116 }
2117 
2118 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2119                                         SourceLocation Loc) {
2120   if (!CGF.HaveInsertPoint())
2121     return;
2122   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2123   llvm::Value *Args[] = {
2124       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2125       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2126   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
2127   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2128     Region->emitUntiedSwitch(CGF);
2129 }
2130 
2131 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2132                                           const RegionCodeGenTy &TaskgroupOpGen,
2133                                           SourceLocation Loc) {
2134   if (!CGF.HaveInsertPoint())
2135     return;
2136   // __kmpc_taskgroup(ident_t *, gtid);
2137   // TaskgroupOpGen();
2138   // __kmpc_end_taskgroup(ident_t *, gtid);
2139   // Prepare arguments and build a call to __kmpc_taskgroup
2140   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2141   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
2142                         createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
2143                         Args);
2144   TaskgroupOpGen.setAction(Action);
2145   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2146 }
2147 
2148 /// Given an array of pointers to variables, project the address of a
2149 /// given variable.
2150 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2151                                       unsigned Index, const VarDecl *Var) {
2152   // Pull out the pointer to the variable.
2153   Address PtrAddr =
2154       CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize());
2155   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2156 
2157   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2158   Addr = CGF.Builder.CreateElementBitCast(
2159       Addr, CGF.ConvertTypeForMem(Var->getType()));
2160   return Addr;
2161 }
2162 
2163 static llvm::Value *emitCopyprivateCopyFunction(
2164     CodeGenModule &CGM, llvm::Type *ArgsType,
2165     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2166     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) {
2167   auto &C = CGM.getContext();
2168   // void copy_func(void *LHSArg, void *RHSArg);
2169   FunctionArgList Args;
2170   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
2171                            C.VoidPtrTy);
2172   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
2173                            C.VoidPtrTy);
2174   Args.push_back(&LHSArg);
2175   Args.push_back(&RHSArg);
2176   auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2177   auto *Fn = llvm::Function::Create(
2178       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
2179       ".omp.copyprivate.copy_func", &CGM.getModule());
2180   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
2181   CodeGenFunction CGF(CGM);
2182   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
2183   // Dest = (void*[n])(LHSArg);
2184   // Src = (void*[n])(RHSArg);
2185   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2186       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2187       ArgsType), CGF.getPointerAlign());
2188   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2189       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2190       ArgsType), CGF.getPointerAlign());
2191   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2192   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2193   // ...
2194   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2195   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2196     auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2197     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2198 
2199     auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2200     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2201 
2202     auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2203     QualType Type = VD->getType();
2204     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2205   }
2206   CGF.FinishFunction();
2207   return Fn;
2208 }
2209 
2210 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2211                                        const RegionCodeGenTy &SingleOpGen,
2212                                        SourceLocation Loc,
2213                                        ArrayRef<const Expr *> CopyprivateVars,
2214                                        ArrayRef<const Expr *> SrcExprs,
2215                                        ArrayRef<const Expr *> DstExprs,
2216                                        ArrayRef<const Expr *> AssignmentOps) {
2217   if (!CGF.HaveInsertPoint())
2218     return;
2219   assert(CopyprivateVars.size() == SrcExprs.size() &&
2220          CopyprivateVars.size() == DstExprs.size() &&
2221          CopyprivateVars.size() == AssignmentOps.size());
2222   auto &C = CGM.getContext();
2223   // int32 did_it = 0;
2224   // if(__kmpc_single(ident_t *, gtid)) {
2225   //   SingleOpGen();
2226   //   __kmpc_end_single(ident_t *, gtid);
2227   //   did_it = 1;
2228   // }
2229   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2230   // <copy_func>, did_it);
2231 
2232   Address DidIt = Address::invalid();
2233   if (!CopyprivateVars.empty()) {
2234     // int32 did_it = 0;
2235     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2236     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2237     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2238   }
2239   // Prepare arguments and build a call to __kmpc_single
2240   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2241   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
2242                         createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
2243                         /*Conditional=*/true);
2244   SingleOpGen.setAction(Action);
2245   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2246   if (DidIt.isValid()) {
2247     // did_it = 1;
2248     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2249   }
2250   Action.Done(CGF);
2251   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2252   // <copy_func>, did_it);
2253   if (DidIt.isValid()) {
2254     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2255     auto CopyprivateArrayTy =
2256         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
2257                                /*IndexTypeQuals=*/0);
2258     // Create a list of all private variables for copyprivate.
2259     Address CopyprivateList =
2260         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2261     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2262       Address Elem = CGF.Builder.CreateConstArrayGEP(
2263           CopyprivateList, I, CGF.getPointerSize());
2264       CGF.Builder.CreateStore(
2265           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2266               CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
2267           Elem);
2268     }
2269     // Build function that copies private values from single region to all other
2270     // threads in the corresponding parallel region.
2271     auto *CpyFn = emitCopyprivateCopyFunction(
2272         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2273         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
2274     auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2275     Address CL =
2276       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2277                                                       CGF.VoidPtrTy);
2278     auto *DidItVal = CGF.Builder.CreateLoad(DidIt);
2279     llvm::Value *Args[] = {
2280         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2281         getThreadID(CGF, Loc),        // i32 <gtid>
2282         BufSize,                      // size_t <buf_size>
2283         CL.getPointer(),              // void *<copyprivate list>
2284         CpyFn,                        // void (*) (void *, void *) <copy_func>
2285         DidItVal                      // i32 did_it
2286     };
2287     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
2288   }
2289 }
2290 
2291 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2292                                         const RegionCodeGenTy &OrderedOpGen,
2293                                         SourceLocation Loc, bool IsThreads) {
2294   if (!CGF.HaveInsertPoint())
2295     return;
2296   // __kmpc_ordered(ident_t *, gtid);
2297   // OrderedOpGen();
2298   // __kmpc_end_ordered(ident_t *, gtid);
2299   // Prepare arguments and build a call to __kmpc_ordered
2300   if (IsThreads) {
2301     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2302     CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
2303                           createRuntimeFunction(OMPRTL__kmpc_end_ordered),
2304                           Args);
2305     OrderedOpGen.setAction(Action);
2306     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2307     return;
2308   }
2309   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2310 }
2311 
2312 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2313                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2314                                       bool ForceSimpleCall) {
2315   if (!CGF.HaveInsertPoint())
2316     return;
2317   // Build call __kmpc_cancel_barrier(loc, thread_id);
2318   // Build call __kmpc_barrier(loc, thread_id);
2319   unsigned Flags;
2320   if (Kind == OMPD_for)
2321     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2322   else if (Kind == OMPD_sections)
2323     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2324   else if (Kind == OMPD_single)
2325     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2326   else if (Kind == OMPD_barrier)
2327     Flags = OMP_IDENT_BARRIER_EXPL;
2328   else
2329     Flags = OMP_IDENT_BARRIER_IMPL;
2330   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2331   // thread_id);
2332   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2333                          getThreadID(CGF, Loc)};
2334   if (auto *OMPRegionInfo =
2335           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
2336     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2337       auto *Result = CGF.EmitRuntimeCall(
2338           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
2339       if (EmitChecks) {
2340         // if (__kmpc_cancel_barrier()) {
2341         //   exit from construct;
2342         // }
2343         auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
2344         auto *ContBB = CGF.createBasicBlock(".cancel.continue");
2345         auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
2346         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2347         CGF.EmitBlock(ExitBB);
2348         //   exit from construct;
2349         auto CancelDestination =
2350             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2351         CGF.EmitBranchThroughCleanup(CancelDestination);
2352         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2353       }
2354       return;
2355     }
2356   }
2357   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
2358 }
2359 
2360 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
2361 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2362                                           bool Chunked, bool Ordered) {
2363   switch (ScheduleKind) {
2364   case OMPC_SCHEDULE_static:
2365     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2366                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2367   case OMPC_SCHEDULE_dynamic:
2368     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2369   case OMPC_SCHEDULE_guided:
2370     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2371   case OMPC_SCHEDULE_runtime:
2372     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2373   case OMPC_SCHEDULE_auto:
2374     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2375   case OMPC_SCHEDULE_unknown:
2376     assert(!Chunked && "chunk was specified but schedule kind not known");
2377     return Ordered ? OMP_ord_static : OMP_sch_static;
2378   }
2379   llvm_unreachable("Unexpected runtime schedule");
2380 }
2381 
2382 /// \brief Map the OpenMP distribute schedule to the runtime enumeration.
2383 static OpenMPSchedType
2384 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2385   // only static is allowed for dist_schedule
2386   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2387 }
2388 
2389 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2390                                          bool Chunked) const {
2391   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2392   return Schedule == OMP_sch_static;
2393 }
2394 
2395 bool CGOpenMPRuntime::isStaticNonchunked(
2396     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2397   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2398   return Schedule == OMP_dist_sch_static;
2399 }
2400 
2401 
2402 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2403   auto Schedule =
2404       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2405   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2406   return Schedule != OMP_sch_static;
2407 }
2408 
2409 static int addMonoNonMonoModifier(OpenMPSchedType Schedule,
2410                                   OpenMPScheduleClauseModifier M1,
2411                                   OpenMPScheduleClauseModifier M2) {
2412   switch (M1) {
2413   case OMPC_SCHEDULE_MODIFIER_monotonic:
2414     return Schedule | OMP_sch_modifier_monotonic;
2415   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2416     return Schedule | OMP_sch_modifier_nonmonotonic;
2417   case OMPC_SCHEDULE_MODIFIER_simd:
2418   case OMPC_SCHEDULE_MODIFIER_last:
2419   case OMPC_SCHEDULE_MODIFIER_unknown:
2420     break;
2421   }
2422   switch (M2) {
2423   case OMPC_SCHEDULE_MODIFIER_monotonic:
2424     return Schedule | OMP_sch_modifier_monotonic;
2425   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2426     return Schedule | OMP_sch_modifier_nonmonotonic;
2427   case OMPC_SCHEDULE_MODIFIER_simd:
2428   case OMPC_SCHEDULE_MODIFIER_last:
2429   case OMPC_SCHEDULE_MODIFIER_unknown:
2430     break;
2431   }
2432   return Schedule;
2433 }
2434 
2435 void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF,
2436                                           SourceLocation Loc,
2437                                           const OpenMPScheduleTy &ScheduleKind,
2438                                           unsigned IVSize, bool IVSigned,
2439                                           bool Ordered, llvm::Value *UB,
2440                                           llvm::Value *Chunk) {
2441   if (!CGF.HaveInsertPoint())
2442     return;
2443   OpenMPSchedType Schedule =
2444       getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered);
2445   assert(Ordered ||
2446          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2447           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked));
2448   // Call __kmpc_dispatch_init(
2449   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2450   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2451   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2452 
2453   // If the Chunk was not specified in the clause - use default value 1.
2454   if (Chunk == nullptr)
2455     Chunk = CGF.Builder.getIntN(IVSize, 1);
2456   llvm::Value *Args[] = {
2457       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2458       CGF.Builder.getInt32(addMonoNonMonoModifier(
2459           Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2460       CGF.Builder.getIntN(IVSize, 0),                   // Lower
2461       UB,                                               // Upper
2462       CGF.Builder.getIntN(IVSize, 1),                   // Stride
2463       Chunk                                             // Chunk
2464   };
2465   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2466 }
2467 
2468 static void emitForStaticInitCall(
2469     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2470     llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule,
2471     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2472     unsigned IVSize, bool Ordered, Address IL, Address LB, Address UB,
2473     Address ST, llvm::Value *Chunk) {
2474   if (!CGF.HaveInsertPoint())
2475      return;
2476 
2477    assert(!Ordered);
2478    assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2479           Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2480           Schedule == OMP_dist_sch_static ||
2481           Schedule == OMP_dist_sch_static_chunked);
2482 
2483    // Call __kmpc_for_static_init(
2484    //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2485    //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2486    //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2487    //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2488    if (Chunk == nullptr) {
2489      assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2490              Schedule == OMP_dist_sch_static) &&
2491             "expected static non-chunked schedule");
2492      // If the Chunk was not specified in the clause - use default value 1.
2493        Chunk = CGF.Builder.getIntN(IVSize, 1);
2494    } else {
2495      assert((Schedule == OMP_sch_static_chunked ||
2496              Schedule == OMP_ord_static_chunked ||
2497              Schedule == OMP_dist_sch_static_chunked) &&
2498             "expected static chunked schedule");
2499    }
2500    llvm::Value *Args[] = {
2501        UpdateLocation, ThreadId, CGF.Builder.getInt32(addMonoNonMonoModifier(
2502                                      Schedule, M1, M2)), // Schedule type
2503        IL.getPointer(),                                  // &isLastIter
2504        LB.getPointer(),                                  // &LB
2505        UB.getPointer(),                                  // &UB
2506        ST.getPointer(),                                  // &Stride
2507        CGF.Builder.getIntN(IVSize, 1),                   // Incr
2508        Chunk                                             // Chunk
2509    };
2510    CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2511 }
2512 
2513 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2514                                         SourceLocation Loc,
2515                                         const OpenMPScheduleTy &ScheduleKind,
2516                                         unsigned IVSize, bool IVSigned,
2517                                         bool Ordered, Address IL, Address LB,
2518                                         Address UB, Address ST,
2519                                         llvm::Value *Chunk) {
2520   OpenMPSchedType ScheduleNum =
2521       getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered);
2522   auto *UpdatedLocation = emitUpdateLocation(CGF, Loc);
2523   auto *ThreadId = getThreadID(CGF, Loc);
2524   auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned);
2525   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2526                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, IVSize,
2527                         Ordered, IL, LB, UB, ST, Chunk);
2528 }
2529 
2530 void CGOpenMPRuntime::emitDistributeStaticInit(
2531     CodeGenFunction &CGF, SourceLocation Loc,
2532     OpenMPDistScheduleClauseKind SchedKind, unsigned IVSize, bool IVSigned,
2533     bool Ordered, Address IL, Address LB, Address UB, Address ST,
2534     llvm::Value *Chunk) {
2535   OpenMPSchedType ScheduleNum = getRuntimeSchedule(SchedKind, Chunk != nullptr);
2536   auto *UpdatedLocation = emitUpdateLocation(CGF, Loc);
2537   auto *ThreadId = getThreadID(CGF, Loc);
2538   auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned);
2539   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2540                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2541                         OMPC_SCHEDULE_MODIFIER_unknown, IVSize, Ordered, IL, LB,
2542                         UB, ST, Chunk);
2543 }
2544 
2545 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2546                                           SourceLocation Loc) {
2547   if (!CGF.HaveInsertPoint())
2548     return;
2549   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2550   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2551   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
2552                       Args);
2553 }
2554 
2555 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2556                                                  SourceLocation Loc,
2557                                                  unsigned IVSize,
2558                                                  bool IVSigned) {
2559   if (!CGF.HaveInsertPoint())
2560     return;
2561   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2562   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2563   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2564 }
2565 
2566 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2567                                           SourceLocation Loc, unsigned IVSize,
2568                                           bool IVSigned, Address IL,
2569                                           Address LB, Address UB,
2570                                           Address ST) {
2571   // Call __kmpc_dispatch_next(
2572   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2573   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2574   //          kmp_int[32|64] *p_stride);
2575   llvm::Value *Args[] = {
2576       emitUpdateLocation(CGF, Loc),
2577       getThreadID(CGF, Loc),
2578       IL.getPointer(), // &isLastIter
2579       LB.getPointer(), // &Lower
2580       UB.getPointer(), // &Upper
2581       ST.getPointer()  // &Stride
2582   };
2583   llvm::Value *Call =
2584       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2585   return CGF.EmitScalarConversion(
2586       Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true),
2587       CGF.getContext().BoolTy, Loc);
2588 }
2589 
2590 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2591                                            llvm::Value *NumThreads,
2592                                            SourceLocation Loc) {
2593   if (!CGF.HaveInsertPoint())
2594     return;
2595   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2596   llvm::Value *Args[] = {
2597       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2598       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2599   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
2600                       Args);
2601 }
2602 
2603 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2604                                          OpenMPProcBindClauseKind ProcBind,
2605                                          SourceLocation Loc) {
2606   if (!CGF.HaveInsertPoint())
2607     return;
2608   // Constants for proc bind value accepted by the runtime.
2609   enum ProcBindTy {
2610     ProcBindFalse = 0,
2611     ProcBindTrue,
2612     ProcBindMaster,
2613     ProcBindClose,
2614     ProcBindSpread,
2615     ProcBindIntel,
2616     ProcBindDefault
2617   } RuntimeProcBind;
2618   switch (ProcBind) {
2619   case OMPC_PROC_BIND_master:
2620     RuntimeProcBind = ProcBindMaster;
2621     break;
2622   case OMPC_PROC_BIND_close:
2623     RuntimeProcBind = ProcBindClose;
2624     break;
2625   case OMPC_PROC_BIND_spread:
2626     RuntimeProcBind = ProcBindSpread;
2627     break;
2628   case OMPC_PROC_BIND_unknown:
2629     llvm_unreachable("Unsupported proc_bind value.");
2630   }
2631   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2632   llvm::Value *Args[] = {
2633       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2634       llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
2635   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
2636 }
2637 
2638 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2639                                 SourceLocation Loc) {
2640   if (!CGF.HaveInsertPoint())
2641     return;
2642   // Build call void __kmpc_flush(ident_t *loc)
2643   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
2644                       emitUpdateLocation(CGF, Loc));
2645 }
2646 
2647 namespace {
2648 /// \brief Indexes of fields for type kmp_task_t.
2649 enum KmpTaskTFields {
2650   /// \brief List of shared variables.
2651   KmpTaskTShareds,
2652   /// \brief Task routine.
2653   KmpTaskTRoutine,
2654   /// \brief Partition id for the untied tasks.
2655   KmpTaskTPartId,
2656   /// \brief Function with call of destructors for private variables.
2657   KmpTaskTDestructors,
2658   /// (Taskloops only) Lower bound.
2659   KmpTaskTLowerBound,
2660   /// (Taskloops only) Upper bound.
2661   KmpTaskTUpperBound,
2662   /// (Taskloops only) Stride.
2663   KmpTaskTStride,
2664   /// (Taskloops only) Is last iteration flag.
2665   KmpTaskTLastIter,
2666 };
2667 } // anonymous namespace
2668 
2669 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2670   // FIXME: Add other entries type when they become supported.
2671   return OffloadEntriesTargetRegion.empty();
2672 }
2673 
2674 /// \brief Initialize target region entry.
2675 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2676     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2677                                     StringRef ParentName, unsigned LineNum,
2678                                     unsigned Order) {
2679   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2680                                              "only required for the device "
2681                                              "code generation.");
2682   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2683       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr);
2684   ++OffloadingEntriesNum;
2685 }
2686 
2687 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2688     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2689                                   StringRef ParentName, unsigned LineNum,
2690                                   llvm::Constant *Addr, llvm::Constant *ID) {
2691   // If we are emitting code for a target, the entry is already initialized,
2692   // only has to be registered.
2693   if (CGM.getLangOpts().OpenMPIsDevice) {
2694     assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
2695            "Entry must exist.");
2696     auto &Entry =
2697         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
2698     assert(Entry.isValid() && "Entry not initialized!");
2699     Entry.setAddress(Addr);
2700     Entry.setID(ID);
2701     return;
2702   } else {
2703     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID);
2704     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
2705   }
2706 }
2707 
2708 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
2709     unsigned DeviceID, unsigned FileID, StringRef ParentName,
2710     unsigned LineNum) const {
2711   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
2712   if (PerDevice == OffloadEntriesTargetRegion.end())
2713     return false;
2714   auto PerFile = PerDevice->second.find(FileID);
2715   if (PerFile == PerDevice->second.end())
2716     return false;
2717   auto PerParentName = PerFile->second.find(ParentName);
2718   if (PerParentName == PerFile->second.end())
2719     return false;
2720   auto PerLine = PerParentName->second.find(LineNum);
2721   if (PerLine == PerParentName->second.end())
2722     return false;
2723   // Fail if this entry is already registered.
2724   if (PerLine->second.getAddress() || PerLine->second.getID())
2725     return false;
2726   return true;
2727 }
2728 
2729 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
2730     const OffloadTargetRegionEntryInfoActTy &Action) {
2731   // Scan all target region entries and perform the provided action.
2732   for (auto &D : OffloadEntriesTargetRegion)
2733     for (auto &F : D.second)
2734       for (auto &P : F.second)
2735         for (auto &L : P.second)
2736           Action(D.first, F.first, P.first(), L.first, L.second);
2737 }
2738 
2739 /// \brief Create a Ctor/Dtor-like function whose body is emitted through
2740 /// \a Codegen. This is used to emit the two functions that register and
2741 /// unregister the descriptor of the current compilation unit.
2742 static llvm::Function *
2743 createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name,
2744                                          const RegionCodeGenTy &Codegen) {
2745   auto &C = CGM.getContext();
2746   FunctionArgList Args;
2747   ImplicitParamDecl DummyPtr(C, /*DC=*/nullptr, SourceLocation(),
2748                              /*Id=*/nullptr, C.VoidPtrTy);
2749   Args.push_back(&DummyPtr);
2750 
2751   CodeGenFunction CGF(CGM);
2752   GlobalDecl();
2753   auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2754   auto FTy = CGM.getTypes().GetFunctionType(FI);
2755   auto *Fn =
2756       CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation());
2757   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args, SourceLocation());
2758   Codegen(CGF);
2759   CGF.FinishFunction();
2760   return Fn;
2761 }
2762 
2763 llvm::Function *
2764 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
2765 
2766   // If we don't have entries or if we are emitting code for the device, we
2767   // don't need to do anything.
2768   if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
2769     return nullptr;
2770 
2771   auto &M = CGM.getModule();
2772   auto &C = CGM.getContext();
2773 
2774   // Get list of devices we care about
2775   auto &Devices = CGM.getLangOpts().OMPTargetTriples;
2776 
2777   // We should be creating an offloading descriptor only if there are devices
2778   // specified.
2779   assert(!Devices.empty() && "No OpenMP offloading devices??");
2780 
2781   // Create the external variables that will point to the begin and end of the
2782   // host entries section. These will be defined by the linker.
2783   auto *OffloadEntryTy =
2784       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
2785   llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable(
2786       M, OffloadEntryTy, /*isConstant=*/true,
2787       llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
2788       ".omp_offloading.entries_begin");
2789   llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable(
2790       M, OffloadEntryTy, /*isConstant=*/true,
2791       llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
2792       ".omp_offloading.entries_end");
2793 
2794   // Create all device images
2795   llvm::SmallVector<llvm::Constant *, 4> DeviceImagesEntires;
2796   auto *DeviceImageTy = cast<llvm::StructType>(
2797       CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy()));
2798 
2799   for (unsigned i = 0; i < Devices.size(); ++i) {
2800     StringRef T = Devices[i].getTriple();
2801     auto *ImgBegin = new llvm::GlobalVariable(
2802         M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
2803         /*Initializer=*/nullptr,
2804         Twine(".omp_offloading.img_start.") + Twine(T));
2805     auto *ImgEnd = new llvm::GlobalVariable(
2806         M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
2807         /*Initializer=*/nullptr, Twine(".omp_offloading.img_end.") + Twine(T));
2808 
2809     llvm::Constant *Dev =
2810         llvm::ConstantStruct::get(DeviceImageTy, ImgBegin, ImgEnd,
2811                                   HostEntriesBegin, HostEntriesEnd, nullptr);
2812     DeviceImagesEntires.push_back(Dev);
2813   }
2814 
2815   // Create device images global array.
2816   llvm::ArrayType *DeviceImagesInitTy =
2817       llvm::ArrayType::get(DeviceImageTy, DeviceImagesEntires.size());
2818   llvm::Constant *DeviceImagesInit =
2819       llvm::ConstantArray::get(DeviceImagesInitTy, DeviceImagesEntires);
2820 
2821   llvm::GlobalVariable *DeviceImages = new llvm::GlobalVariable(
2822       M, DeviceImagesInitTy, /*isConstant=*/true,
2823       llvm::GlobalValue::InternalLinkage, DeviceImagesInit,
2824       ".omp_offloading.device_images");
2825   DeviceImages->setUnnamedAddr(true);
2826 
2827   // This is a Zero array to be used in the creation of the constant expressions
2828   llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
2829                              llvm::Constant::getNullValue(CGM.Int32Ty)};
2830 
2831   // Create the target region descriptor.
2832   auto *BinaryDescriptorTy = cast<llvm::StructType>(
2833       CGM.getTypes().ConvertTypeForMem(getTgtBinaryDescriptorQTy()));
2834   llvm::Constant *TargetRegionsDescriptorInit = llvm::ConstantStruct::get(
2835       BinaryDescriptorTy, llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()),
2836       llvm::ConstantExpr::getGetElementPtr(DeviceImagesInitTy, DeviceImages,
2837                                            Index),
2838       HostEntriesBegin, HostEntriesEnd, nullptr);
2839 
2840   auto *Desc = new llvm::GlobalVariable(
2841       M, BinaryDescriptorTy, /*isConstant=*/true,
2842       llvm::GlobalValue::InternalLinkage, TargetRegionsDescriptorInit,
2843       ".omp_offloading.descriptor");
2844 
2845   // Emit code to register or unregister the descriptor at execution
2846   // startup or closing, respectively.
2847 
2848   // Create a variable to drive the registration and unregistration of the
2849   // descriptor, so we can reuse the logic that emits Ctors and Dtors.
2850   auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var");
2851   ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(),
2852                                 IdentInfo, C.CharTy);
2853 
2854   auto *UnRegFn = createOffloadingBinaryDescriptorFunction(
2855       CGM, ".omp_offloading.descriptor_unreg",
2856       [&](CodeGenFunction &CGF, PrePostActionTy &) {
2857         CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
2858                              Desc);
2859       });
2860   auto *RegFn = createOffloadingBinaryDescriptorFunction(
2861       CGM, ".omp_offloading.descriptor_reg",
2862       [&](CodeGenFunction &CGF, PrePostActionTy &) {
2863         CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib),
2864                              Desc);
2865         CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
2866       });
2867   return RegFn;
2868 }
2869 
2870 void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *ID,
2871                                          llvm::Constant *Addr, uint64_t Size) {
2872   StringRef Name = Addr->getName();
2873   auto *TgtOffloadEntryType = cast<llvm::StructType>(
2874       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()));
2875   llvm::LLVMContext &C = CGM.getModule().getContext();
2876   llvm::Module &M = CGM.getModule();
2877 
2878   // Make sure the address has the right type.
2879   llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy);
2880 
2881   // Create constant string with the name.
2882   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
2883 
2884   llvm::GlobalVariable *Str =
2885       new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true,
2886                                llvm::GlobalValue::InternalLinkage, StrPtrInit,
2887                                ".omp_offloading.entry_name");
2888   Str->setUnnamedAddr(true);
2889   llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy);
2890 
2891   // Create the entry struct.
2892   llvm::Constant *EntryInit = llvm::ConstantStruct::get(
2893       TgtOffloadEntryType, AddrPtr, StrPtr,
2894       llvm::ConstantInt::get(CGM.SizeTy, Size), nullptr);
2895   llvm::GlobalVariable *Entry = new llvm::GlobalVariable(
2896       M, TgtOffloadEntryType, true, llvm::GlobalValue::ExternalLinkage,
2897       EntryInit, ".omp_offloading.entry");
2898 
2899   // The entry has to be created in the section the linker expects it to be.
2900   Entry->setSection(".omp_offloading.entries");
2901   // We can't have any padding between symbols, so we need to have 1-byte
2902   // alignment.
2903   Entry->setAlignment(1);
2904 }
2905 
2906 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2907   // Emit the offloading entries and metadata so that the device codegen side
2908   // can
2909   // easily figure out what to emit. The produced metadata looks like this:
2910   //
2911   // !omp_offload.info = !{!1, ...}
2912   //
2913   // Right now we only generate metadata for function that contain target
2914   // regions.
2915 
2916   // If we do not have entries, we dont need to do anything.
2917   if (OffloadEntriesInfoManager.empty())
2918     return;
2919 
2920   llvm::Module &M = CGM.getModule();
2921   llvm::LLVMContext &C = M.getContext();
2922   SmallVector<OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
2923       OrderedEntries(OffloadEntriesInfoManager.size());
2924 
2925   // Create the offloading info metadata node.
2926   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
2927 
2928   // Auxiliar methods to create metadata values and strings.
2929   auto getMDInt = [&](unsigned v) {
2930     return llvm::ConstantAsMetadata::get(
2931         llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v));
2932   };
2933 
2934   auto getMDString = [&](StringRef v) { return llvm::MDString::get(C, v); };
2935 
2936   // Create function that emits metadata for each target region entry;
2937   auto &&TargetRegionMetadataEmitter = [&](
2938       unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line,
2939       OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
2940     llvm::SmallVector<llvm::Metadata *, 32> Ops;
2941     // Generate metadata for target regions. Each entry of this metadata
2942     // contains:
2943     // - Entry 0 -> Kind of this type of metadata (0).
2944     // - Entry 1 -> Device ID of the file where the entry was identified.
2945     // - Entry 2 -> File ID of the file where the entry was identified.
2946     // - Entry 3 -> Mangled name of the function where the entry was identified.
2947     // - Entry 4 -> Line in the file where the entry was identified.
2948     // - Entry 5 -> Order the entry was created.
2949     // The first element of the metadata node is the kind.
2950     Ops.push_back(getMDInt(E.getKind()));
2951     Ops.push_back(getMDInt(DeviceID));
2952     Ops.push_back(getMDInt(FileID));
2953     Ops.push_back(getMDString(ParentName));
2954     Ops.push_back(getMDInt(Line));
2955     Ops.push_back(getMDInt(E.getOrder()));
2956 
2957     // Save this entry in the right position of the ordered entries array.
2958     OrderedEntries[E.getOrder()] = &E;
2959 
2960     // Add metadata to the named metadata node.
2961     MD->addOperand(llvm::MDNode::get(C, Ops));
2962   };
2963 
2964   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
2965       TargetRegionMetadataEmitter);
2966 
2967   for (auto *E : OrderedEntries) {
2968     assert(E && "All ordered entries must exist!");
2969     if (auto *CE =
2970             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
2971                 E)) {
2972       assert(CE->getID() && CE->getAddress() &&
2973              "Entry ID and Addr are invalid!");
2974       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0);
2975     } else
2976       llvm_unreachable("Unsupported entry kind.");
2977   }
2978 }
2979 
2980 /// \brief Loads all the offload entries information from the host IR
2981 /// metadata.
2982 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
2983   // If we are in target mode, load the metadata from the host IR. This code has
2984   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
2985 
2986   if (!CGM.getLangOpts().OpenMPIsDevice)
2987     return;
2988 
2989   if (CGM.getLangOpts().OMPHostIRFile.empty())
2990     return;
2991 
2992   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
2993   if (Buf.getError())
2994     return;
2995 
2996   llvm::LLVMContext C;
2997   auto ME = llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C);
2998 
2999   if (ME.getError())
3000     return;
3001 
3002   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3003   if (!MD)
3004     return;
3005 
3006   for (auto I : MD->operands()) {
3007     llvm::MDNode *MN = cast<llvm::MDNode>(I);
3008 
3009     auto getMDInt = [&](unsigned Idx) {
3010       llvm::ConstantAsMetadata *V =
3011           cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3012       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3013     };
3014 
3015     auto getMDString = [&](unsigned Idx) {
3016       llvm::MDString *V = cast<llvm::MDString>(MN->getOperand(Idx));
3017       return V->getString();
3018     };
3019 
3020     switch (getMDInt(0)) {
3021     default:
3022       llvm_unreachable("Unexpected metadata!");
3023       break;
3024     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3025         OFFLOAD_ENTRY_INFO_TARGET_REGION:
3026       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3027           /*DeviceID=*/getMDInt(1), /*FileID=*/getMDInt(2),
3028           /*ParentName=*/getMDString(3), /*Line=*/getMDInt(4),
3029           /*Order=*/getMDInt(5));
3030       break;
3031     }
3032   }
3033 }
3034 
3035 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3036   if (!KmpRoutineEntryPtrTy) {
3037     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3038     auto &C = CGM.getContext();
3039     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3040     FunctionProtoType::ExtProtoInfo EPI;
3041     KmpRoutineEntryPtrQTy = C.getPointerType(
3042         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3043     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3044   }
3045 }
3046 
3047 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
3048                                        QualType FieldTy) {
3049   auto *Field = FieldDecl::Create(
3050       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
3051       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
3052       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
3053   Field->setAccess(AS_public);
3054   DC->addDecl(Field);
3055   return Field;
3056 }
3057 
3058 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3059 
3060   // Make sure the type of the entry is already created. This is the type we
3061   // have to create:
3062   // struct __tgt_offload_entry{
3063   //   void      *addr;       // Pointer to the offload entry info.
3064   //                          // (function or global)
3065   //   char      *name;       // Name of the function or global.
3066   //   size_t     size;       // Size of the entry info (0 if it a function).
3067   // };
3068   if (TgtOffloadEntryQTy.isNull()) {
3069     ASTContext &C = CGM.getContext();
3070     auto *RD = C.buildImplicitRecord("__tgt_offload_entry");
3071     RD->startDefinition();
3072     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3073     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3074     addFieldToRecordDecl(C, RD, C.getSizeType());
3075     RD->completeDefinition();
3076     TgtOffloadEntryQTy = C.getRecordType(RD);
3077   }
3078   return TgtOffloadEntryQTy;
3079 }
3080 
3081 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() {
3082   // These are the types we need to build:
3083   // struct __tgt_device_image{
3084   // void   *ImageStart;       // Pointer to the target code start.
3085   // void   *ImageEnd;         // Pointer to the target code end.
3086   // // We also add the host entries to the device image, as it may be useful
3087   // // for the target runtime to have access to that information.
3088   // __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all
3089   //                                       // the entries.
3090   // __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
3091   //                                       // entries (non inclusive).
3092   // };
3093   if (TgtDeviceImageQTy.isNull()) {
3094     ASTContext &C = CGM.getContext();
3095     auto *RD = C.buildImplicitRecord("__tgt_device_image");
3096     RD->startDefinition();
3097     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3098     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3099     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
3100     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
3101     RD->completeDefinition();
3102     TgtDeviceImageQTy = C.getRecordType(RD);
3103   }
3104   return TgtDeviceImageQTy;
3105 }
3106 
3107 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() {
3108   // struct __tgt_bin_desc{
3109   //   int32_t              NumDevices;      // Number of devices supported.
3110   //   __tgt_device_image   *DeviceImages;   // Arrays of device images
3111   //                                         // (one per device).
3112   //   __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all the
3113   //                                         // entries.
3114   //   __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
3115   //                                         // entries (non inclusive).
3116   // };
3117   if (TgtBinaryDescriptorQTy.isNull()) {
3118     ASTContext &C = CGM.getContext();
3119     auto *RD = C.buildImplicitRecord("__tgt_bin_desc");
3120     RD->startDefinition();
3121     addFieldToRecordDecl(
3122         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3123     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy()));
3124     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
3125     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
3126     RD->completeDefinition();
3127     TgtBinaryDescriptorQTy = C.getRecordType(RD);
3128   }
3129   return TgtBinaryDescriptorQTy;
3130 }
3131 
3132 namespace {
3133 struct PrivateHelpersTy {
3134   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
3135                    const VarDecl *PrivateElemInit)
3136       : Original(Original), PrivateCopy(PrivateCopy),
3137         PrivateElemInit(PrivateElemInit) {}
3138   const VarDecl *Original;
3139   const VarDecl *PrivateCopy;
3140   const VarDecl *PrivateElemInit;
3141 };
3142 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3143 } // anonymous namespace
3144 
3145 static RecordDecl *
3146 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3147   if (!Privates.empty()) {
3148     auto &C = CGM.getContext();
3149     // Build struct .kmp_privates_t. {
3150     //         /*  private vars  */
3151     //       };
3152     auto *RD = C.buildImplicitRecord(".kmp_privates.t");
3153     RD->startDefinition();
3154     for (auto &&Pair : Privates) {
3155       auto *VD = Pair.second.Original;
3156       auto Type = VD->getType();
3157       Type = Type.getNonReferenceType();
3158       auto *FD = addFieldToRecordDecl(C, RD, Type);
3159       if (VD->hasAttrs()) {
3160         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3161              E(VD->getAttrs().end());
3162              I != E; ++I)
3163           FD->addAttr(*I);
3164       }
3165     }
3166     RD->completeDefinition();
3167     return RD;
3168   }
3169   return nullptr;
3170 }
3171 
3172 static RecordDecl *
3173 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3174                          QualType KmpInt32Ty,
3175                          QualType KmpRoutineEntryPointerQTy) {
3176   auto &C = CGM.getContext();
3177   // Build struct kmp_task_t {
3178   //         void *              shareds;
3179   //         kmp_routine_entry_t routine;
3180   //         kmp_int32           part_id;
3181   //         kmp_routine_entry_t destructors;
3182   // For taskloops additional fields:
3183   //         kmp_uint64          lb;
3184   //         kmp_uint64          ub;
3185   //         kmp_int64           st;
3186   //         kmp_int32           liter;
3187   //       };
3188   auto *RD = C.buildImplicitRecord("kmp_task_t");
3189   RD->startDefinition();
3190   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3191   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3192   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3193   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3194   if (isOpenMPTaskLoopDirective(Kind)) {
3195     QualType KmpUInt64Ty =
3196         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3197     QualType KmpInt64Ty =
3198         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3199     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3200     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3201     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3202     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3203   }
3204   RD->completeDefinition();
3205   return RD;
3206 }
3207 
3208 static RecordDecl *
3209 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3210                                      ArrayRef<PrivateDataTy> Privates) {
3211   auto &C = CGM.getContext();
3212   // Build struct kmp_task_t_with_privates {
3213   //         kmp_task_t task_data;
3214   //         .kmp_privates_t. privates;
3215   //       };
3216   auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3217   RD->startDefinition();
3218   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3219   if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) {
3220     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3221   }
3222   RD->completeDefinition();
3223   return RD;
3224 }
3225 
3226 /// \brief Emit a proxy function which accepts kmp_task_t as the second
3227 /// argument.
3228 /// \code
3229 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3230 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3231 ///   For taskloops:
3232 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3233 ///   tt->shareds);
3234 ///   return 0;
3235 /// }
3236 /// \endcode
3237 static llvm::Value *
3238 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3239                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3240                       QualType KmpTaskTWithPrivatesPtrQTy,
3241                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3242                       QualType SharedsPtrTy, llvm::Value *TaskFunction,
3243                       llvm::Value *TaskPrivatesMap) {
3244   auto &C = CGM.getContext();
3245   FunctionArgList Args;
3246   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
3247   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
3248                                 /*Id=*/nullptr,
3249                                 KmpTaskTWithPrivatesPtrQTy.withRestrict());
3250   Args.push_back(&GtidArg);
3251   Args.push_back(&TaskTypeArg);
3252   auto &TaskEntryFnInfo =
3253       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3254   auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3255   auto *TaskEntry =
3256       llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
3257                              ".omp_task_entry.", &CGM.getModule());
3258   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo);
3259   CodeGenFunction CGF(CGM);
3260   CGF.disableDebugInfo();
3261   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
3262 
3263   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3264   // tt,
3265   // For taskloops:
3266   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3267   // tt->task_data.shareds);
3268   auto *GtidParam = CGF.EmitLoadOfScalar(
3269       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3270   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3271       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3272       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3273   auto *KmpTaskTWithPrivatesQTyRD =
3274       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3275   LValue Base =
3276       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3277   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3278   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3279   auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3280   auto *PartidParam = PartIdLVal.getPointer();
3281 
3282   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3283   auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3284   auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3285       CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(),
3286       CGF.ConvertTypeForMem(SharedsPtrTy));
3287 
3288   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3289   llvm::Value *PrivatesParam;
3290   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3291     auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3292     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3293         PrivatesLVal.getPointer(), CGF.VoidPtrTy);
3294   } else
3295     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3296 
3297   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3298                                TaskPrivatesMap,
3299                                CGF.Builder
3300                                    .CreatePointerBitCastOrAddrSpaceCast(
3301                                        TDBase.getAddress(), CGF.VoidPtrTy)
3302                                    .getPointer()};
3303   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3304                                           std::end(CommonArgs));
3305   if (isOpenMPTaskLoopDirective(Kind)) {
3306     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3307     auto LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3308     auto *LBParam = CGF.EmitLoadOfLValue(LBLVal, Loc).getScalarVal();
3309     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3310     auto UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3311     auto *UBParam = CGF.EmitLoadOfLValue(UBLVal, Loc).getScalarVal();
3312     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3313     auto StLVal = CGF.EmitLValueForField(Base, *StFI);
3314     auto *StParam = CGF.EmitLoadOfLValue(StLVal, Loc).getScalarVal();
3315     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3316     auto LILVal = CGF.EmitLValueForField(Base, *LIFI);
3317     auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal();
3318     CallArgs.push_back(LBParam);
3319     CallArgs.push_back(UBParam);
3320     CallArgs.push_back(StParam);
3321     CallArgs.push_back(LIParam);
3322   }
3323   CallArgs.push_back(SharedsParam);
3324 
3325   CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
3326   CGF.EmitStoreThroughLValue(
3327       RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3328       CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3329   CGF.FinishFunction();
3330   return TaskEntry;
3331 }
3332 
3333 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3334                                             SourceLocation Loc,
3335                                             QualType KmpInt32Ty,
3336                                             QualType KmpTaskTWithPrivatesPtrQTy,
3337                                             QualType KmpTaskTWithPrivatesQTy) {
3338   auto &C = CGM.getContext();
3339   FunctionArgList Args;
3340   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
3341   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
3342                                 /*Id=*/nullptr,
3343                                 KmpTaskTWithPrivatesPtrQTy.withRestrict());
3344   Args.push_back(&GtidArg);
3345   Args.push_back(&TaskTypeArg);
3346   FunctionType::ExtInfo Info;
3347   auto &DestructorFnInfo =
3348       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3349   auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo);
3350   auto *DestructorFn =
3351       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3352                              ".omp_task_destructor.", &CGM.getModule());
3353   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn,
3354                                     DestructorFnInfo);
3355   CodeGenFunction CGF(CGM);
3356   CGF.disableDebugInfo();
3357   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3358                     Args);
3359 
3360   LValue Base = CGF.EmitLoadOfPointerLValue(
3361       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3362       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3363   auto *KmpTaskTWithPrivatesQTyRD =
3364       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3365   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3366   Base = CGF.EmitLValueForField(Base, *FI);
3367   for (auto *Field :
3368        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3369     if (auto DtorKind = Field->getType().isDestructedType()) {
3370       auto FieldLValue = CGF.EmitLValueForField(Base, Field);
3371       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
3372     }
3373   }
3374   CGF.FinishFunction();
3375   return DestructorFn;
3376 }
3377 
3378 /// \brief Emit a privates mapping function for correct handling of private and
3379 /// firstprivate variables.
3380 /// \code
3381 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3382 /// **noalias priv1,...,  <tyn> **noalias privn) {
3383 ///   *priv1 = &.privates.priv1;
3384 ///   ...;
3385 ///   *privn = &.privates.privn;
3386 /// }
3387 /// \endcode
3388 static llvm::Value *
3389 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3390                                ArrayRef<const Expr *> PrivateVars,
3391                                ArrayRef<const Expr *> FirstprivateVars,
3392                                ArrayRef<const Expr *> LastprivateVars,
3393                                QualType PrivatesQTy,
3394                                ArrayRef<PrivateDataTy> Privates) {
3395   auto &C = CGM.getContext();
3396   FunctionArgList Args;
3397   ImplicitParamDecl TaskPrivatesArg(
3398       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3399       C.getPointerType(PrivatesQTy).withConst().withRestrict());
3400   Args.push_back(&TaskPrivatesArg);
3401   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
3402   unsigned Counter = 1;
3403   for (auto *E: PrivateVars) {
3404     Args.push_back(ImplicitParamDecl::Create(
3405         C, /*DC=*/nullptr, Loc,
3406         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
3407                             .withConst()
3408                             .withRestrict()));
3409     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3410     PrivateVarsPos[VD] = Counter;
3411     ++Counter;
3412   }
3413   for (auto *E : FirstprivateVars) {
3414     Args.push_back(ImplicitParamDecl::Create(
3415         C, /*DC=*/nullptr, Loc,
3416         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
3417                             .withConst()
3418                             .withRestrict()));
3419     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3420     PrivateVarsPos[VD] = Counter;
3421     ++Counter;
3422   }
3423   for (auto *E: LastprivateVars) {
3424     Args.push_back(ImplicitParamDecl::Create(
3425         C, /*DC=*/nullptr, Loc,
3426         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
3427                             .withConst()
3428                             .withRestrict()));
3429     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3430     PrivateVarsPos[VD] = Counter;
3431     ++Counter;
3432   }
3433   auto &TaskPrivatesMapFnInfo =
3434       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3435   auto *TaskPrivatesMapTy =
3436       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3437   auto *TaskPrivatesMap = llvm::Function::Create(
3438       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage,
3439       ".omp_task_privates_map.", &CGM.getModule());
3440   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap,
3441                                     TaskPrivatesMapFnInfo);
3442   TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3443   CodeGenFunction CGF(CGM);
3444   CGF.disableDebugInfo();
3445   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3446                     TaskPrivatesMapFnInfo, Args);
3447 
3448   // *privi = &.privates.privi;
3449   LValue Base = CGF.EmitLoadOfPointerLValue(
3450       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3451       TaskPrivatesArg.getType()->castAs<PointerType>());
3452   auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3453   Counter = 0;
3454   for (auto *Field : PrivatesQTyRD->fields()) {
3455     auto FieldLVal = CGF.EmitLValueForField(Base, Field);
3456     auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3457     auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3458     auto RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3459         RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
3460     CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
3461     ++Counter;
3462   }
3463   CGF.FinishFunction();
3464   return TaskPrivatesMap;
3465 }
3466 
3467 static int array_pod_sort_comparator(const PrivateDataTy *P1,
3468                                      const PrivateDataTy *P2) {
3469   return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0);
3470 }
3471 
3472 /// Emit initialization for private variables in task-based directives.
3473 static void emitPrivatesInit(CodeGenFunction &CGF,
3474                              const OMPExecutableDirective &D,
3475                              Address KmpTaskSharedsPtr, LValue TDBase,
3476                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3477                              QualType SharedsTy, QualType SharedsPtrTy,
3478                              const OMPTaskDataTy &Data,
3479                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3480   auto &C = CGF.getContext();
3481   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3482   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3483   LValue SrcBase;
3484   if (!Data.FirstprivateVars.empty()) {
3485     SrcBase = CGF.MakeAddrLValue(
3486         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3487             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3488         SharedsTy);
3489   }
3490   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(
3491       cast<CapturedStmt>(*D.getAssociatedStmt()));
3492   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3493   for (auto &&Pair : Privates) {
3494     auto *VD = Pair.second.PrivateCopy;
3495     auto *Init = VD->getAnyInitializer();
3496     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3497                              !CGF.isTrivialInitializer(Init)))) {
3498       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3499       if (auto *Elem = Pair.second.PrivateElemInit) {
3500         auto *OriginalVD = Pair.second.Original;
3501         auto *SharedField = CapturesInfo.lookup(OriginalVD);
3502         auto SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3503         SharedRefLValue = CGF.MakeAddrLValue(
3504             Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
3505             SharedRefLValue.getType(), AlignmentSource::Decl);
3506         QualType Type = OriginalVD->getType();
3507         if (Type->isArrayType()) {
3508           // Initialize firstprivate array.
3509           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3510             // Perform simple memcpy.
3511             CGF.EmitAggregateAssign(PrivateLValue.getAddress(),
3512                                     SharedRefLValue.getAddress(), Type);
3513           } else {
3514             // Initialize firstprivate array using element-by-element
3515             // intialization.
3516             CGF.EmitOMPAggregateAssign(
3517                 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
3518                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3519                                                   Address SrcElement) {
3520                   // Clean up any temporaries needed by the initialization.
3521                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3522                   InitScope.addPrivate(
3523                       Elem, [SrcElement]() -> Address { return SrcElement; });
3524                   (void)InitScope.Privatize();
3525                   // Emit initialization for single element.
3526                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3527                       CGF, &CapturesInfo);
3528                   CGF.EmitAnyExprToMem(Init, DestElement,
3529                                        Init->getType().getQualifiers(),
3530                                        /*IsInitializer=*/false);
3531                 });
3532           }
3533         } else {
3534           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3535           InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
3536             return SharedRefLValue.getAddress();
3537           });
3538           (void)InitScope.Privatize();
3539           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3540           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3541                              /*capturedByInit=*/false);
3542         }
3543       } else
3544         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3545     }
3546     ++FI;
3547   }
3548 }
3549 
3550 /// Check if duplication function is required for taskloops.
3551 static bool checkInitIsRequired(CodeGenFunction &CGF,
3552                                 ArrayRef<PrivateDataTy> Privates) {
3553   bool InitRequired = false;
3554   for (auto &&Pair : Privates) {
3555     auto *VD = Pair.second.PrivateCopy;
3556     auto *Init = VD->getAnyInitializer();
3557     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3558                                     !CGF.isTrivialInitializer(Init));
3559   }
3560   return InitRequired;
3561 }
3562 
3563 
3564 /// Emit task_dup function (for initialization of
3565 /// private/firstprivate/lastprivate vars and last_iter flag)
3566 /// \code
3567 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3568 /// lastpriv) {
3569 /// // setup lastprivate flag
3570 ///    task_dst->last = lastpriv;
3571 /// // could be constructor calls here...
3572 /// }
3573 /// \endcode
3574 static llvm::Value *
3575 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3576                     const OMPExecutableDirective &D,
3577                     QualType KmpTaskTWithPrivatesPtrQTy,
3578                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3579                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3580                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3581                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3582   auto &C = CGM.getContext();
3583   FunctionArgList Args;
3584   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc,
3585                            /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy);
3586   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc,
3587                            /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy);
3588   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc,
3589                                 /*Id=*/nullptr, C.IntTy);
3590   Args.push_back(&DstArg);
3591   Args.push_back(&SrcArg);
3592   Args.push_back(&LastprivArg);
3593   auto &TaskDupFnInfo =
3594       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3595   auto *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3596   auto *TaskDup =
3597       llvm::Function::Create(TaskDupTy, llvm::GlobalValue::InternalLinkage,
3598                              ".omp_task_dup.", &CGM.getModule());
3599   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskDup, TaskDupFnInfo);
3600   CodeGenFunction CGF(CGM);
3601   CGF.disableDebugInfo();
3602   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args);
3603 
3604   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3605       CGF.GetAddrOfLocalVar(&DstArg),
3606       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3607   // task_dst->liter = lastpriv;
3608   if (WithLastIter) {
3609     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3610     LValue Base = CGF.EmitLValueForField(
3611         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3612     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3613     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3614         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3615     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3616   }
3617 
3618   // Emit initial values for private copies (if any).
3619   assert(!Privates.empty());
3620   Address KmpTaskSharedsPtr = Address::invalid();
3621   if (!Data.FirstprivateVars.empty()) {
3622     LValue TDBase = CGF.EmitLoadOfPointerLValue(
3623         CGF.GetAddrOfLocalVar(&SrcArg),
3624         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3625     LValue Base = CGF.EmitLValueForField(
3626         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3627     KmpTaskSharedsPtr = Address(
3628         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3629                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
3630                                                   KmpTaskTShareds)),
3631                              Loc),
3632         CGF.getNaturalTypeAlignment(SharedsTy));
3633   }
3634   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3635                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3636   CGF.FinishFunction();
3637   return TaskDup;
3638 }
3639 
3640 /// Checks if destructor function is required to be generated.
3641 /// \return true if cleanups are required, false otherwise.
3642 static bool
3643 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
3644   bool NeedsCleanup = false;
3645   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3646   auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
3647   for (auto *FD : PrivateRD->fields()) {
3648     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
3649     if (NeedsCleanup)
3650       break;
3651   }
3652   return NeedsCleanup;
3653 }
3654 
3655 CGOpenMPRuntime::TaskResultTy
3656 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
3657                               const OMPExecutableDirective &D,
3658                               llvm::Value *TaskFunction, QualType SharedsTy,
3659                               Address Shareds, const OMPTaskDataTy &Data) {
3660   auto &C = CGM.getContext();
3661   llvm::SmallVector<PrivateDataTy, 4> Privates;
3662   // Aggregate privates and sort them by the alignment.
3663   auto I = Data.PrivateCopies.begin();
3664   for (auto *E : Data.PrivateVars) {
3665     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3666     Privates.push_back(std::make_pair(
3667         C.getDeclAlign(VD),
3668         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3669                          /*PrivateElemInit=*/nullptr)));
3670     ++I;
3671   }
3672   I = Data.FirstprivateCopies.begin();
3673   auto IElemInitRef = Data.FirstprivateInits.begin();
3674   for (auto *E : Data.FirstprivateVars) {
3675     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3676     Privates.push_back(std::make_pair(
3677         C.getDeclAlign(VD),
3678         PrivateHelpersTy(
3679             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3680             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))));
3681     ++I;
3682     ++IElemInitRef;
3683   }
3684   I = Data.LastprivateCopies.begin();
3685   for (auto *E : Data.LastprivateVars) {
3686     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3687     Privates.push_back(std::make_pair(
3688         C.getDeclAlign(VD),
3689         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3690                          /*PrivateElemInit=*/nullptr)));
3691     ++I;
3692   }
3693   llvm::array_pod_sort(Privates.begin(), Privates.end(),
3694                        array_pod_sort_comparator);
3695   auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3696   // Build type kmp_routine_entry_t (if not built yet).
3697   emitKmpRoutineEntryT(KmpInt32Ty);
3698   // Build type kmp_task_t (if not built yet).
3699   if (KmpTaskTQTy.isNull()) {
3700     KmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3701         CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3702   }
3703   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3704   // Build particular struct kmp_task_t for the given task.
3705   auto *KmpTaskTWithPrivatesQTyRD =
3706       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
3707   auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
3708   QualType KmpTaskTWithPrivatesPtrQTy =
3709       C.getPointerType(KmpTaskTWithPrivatesQTy);
3710   auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
3711   auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo();
3712   auto *KmpTaskTWithPrivatesTySize = CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3713   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3714 
3715   // Emit initial values for private copies (if any).
3716   llvm::Value *TaskPrivatesMap = nullptr;
3717   auto *TaskPrivatesMapTy =
3718       std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(),
3719                 3)
3720           ->getType();
3721   if (!Privates.empty()) {
3722     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3723     TaskPrivatesMap = emitTaskPrivateMappingFunction(
3724         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
3725         FI->getType(), Privates);
3726     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3727         TaskPrivatesMap, TaskPrivatesMapTy);
3728   } else {
3729     TaskPrivatesMap = llvm::ConstantPointerNull::get(
3730         cast<llvm::PointerType>(TaskPrivatesMapTy));
3731   }
3732   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3733   // kmp_task_t *tt);
3734   auto *TaskEntry = emitProxyTaskFunction(
3735       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3736       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3737       TaskPrivatesMap);
3738 
3739   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3740   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3741   // kmp_routine_entry_t *task_entry);
3742   // Task flags. Format is taken from
3743   // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
3744   // description of kmp_tasking_flags struct.
3745   enum {
3746     TiedFlag = 0x1,
3747     FinalFlag = 0x2,
3748     DestructorsFlag = 0x8,
3749     PriorityFlag = 0x20
3750   };
3751   unsigned Flags = Data.Tied ? TiedFlag : 0;
3752   bool NeedsCleanup = false;
3753   if (!Privates.empty()) {
3754     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
3755     if (NeedsCleanup)
3756       Flags = Flags | DestructorsFlag;
3757   }
3758   if (Data.Priority.getInt())
3759     Flags = Flags | PriorityFlag;
3760   auto *TaskFlags =
3761       Data.Final.getPointer()
3762           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3763                                      CGF.Builder.getInt32(FinalFlag),
3764                                      CGF.Builder.getInt32(/*C=*/0))
3765           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3766   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3767   auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3768   llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
3769                               getThreadID(CGF, Loc), TaskFlags,
3770                               KmpTaskTWithPrivatesTySize, SharedsSize,
3771                               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3772                                   TaskEntry, KmpRoutineEntryPtrTy)};
3773   auto *NewTask = CGF.EmitRuntimeCall(
3774       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
3775   auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3776       NewTask, KmpTaskTWithPrivatesPtrTy);
3777   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
3778                                                KmpTaskTWithPrivatesQTy);
3779   LValue TDBase =
3780       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
3781   // Fill the data in the resulting kmp_task_t record.
3782   // Copy shareds if there are any.
3783   Address KmpTaskSharedsPtr = Address::invalid();
3784   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
3785     KmpTaskSharedsPtr =
3786         Address(CGF.EmitLoadOfScalar(
3787                     CGF.EmitLValueForField(
3788                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
3789                                            KmpTaskTShareds)),
3790                     Loc),
3791                 CGF.getNaturalTypeAlignment(SharedsTy));
3792     CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy);
3793   }
3794   // Emit initial values for private copies (if any).
3795   TaskResultTy Result;
3796   if (!Privates.empty()) {
3797     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
3798                      SharedsTy, SharedsPtrTy, Data, Privates,
3799                      /*ForDup=*/false);
3800     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
3801         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3802       Result.TaskDupFn = emitTaskDupFunction(
3803           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3804           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3805           /*WithLastIter=*/!Data.LastprivateVars.empty());
3806     }
3807   }
3808   // Provide pointer to function with destructors for privates.
3809   llvm::Value *DestructorFn =
3810       NeedsCleanup ? emitDestructorsFunction(CGM, Loc, KmpInt32Ty,
3811                                              KmpTaskTWithPrivatesPtrQTy,
3812                                              KmpTaskTWithPrivatesQTy)
3813                    : llvm::ConstantPointerNull::get(
3814                          cast<llvm::PointerType>(KmpRoutineEntryPtrTy));
3815   LValue Destructor = CGF.EmitLValueForField(
3816       TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTDestructors));
3817   CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3818                             DestructorFn, KmpRoutineEntryPtrTy),
3819                         Destructor);
3820   Result.NewTask = NewTask;
3821   Result.TaskEntry = TaskEntry;
3822   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
3823   Result.TDBase = TDBase;
3824   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
3825   return Result;
3826 }
3827 
3828 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
3829                                    const OMPExecutableDirective &D,
3830                                    llvm::Value *TaskFunction,
3831                                    QualType SharedsTy, Address Shareds,
3832                                    const Expr *IfCond,
3833                                    const OMPTaskDataTy &Data) {
3834   if (!CGF.HaveInsertPoint())
3835     return;
3836 
3837   TaskResultTy Result =
3838       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
3839   llvm::Value *NewTask = Result.NewTask;
3840   llvm::Value *TaskEntry = Result.TaskEntry;
3841   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
3842   LValue TDBase = Result.TDBase;
3843   RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
3844   auto &C = CGM.getContext();
3845   // Process list of dependences.
3846   Address DependenciesArray = Address::invalid();
3847   unsigned NumDependencies = Data.Dependences.size();
3848   if (NumDependencies) {
3849     // Dependence kind for RTL.
3850     enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 };
3851     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
3852     RecordDecl *KmpDependInfoRD;
3853     QualType FlagsTy =
3854         C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
3855     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
3856     if (KmpDependInfoTy.isNull()) {
3857       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
3858       KmpDependInfoRD->startDefinition();
3859       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
3860       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
3861       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
3862       KmpDependInfoRD->completeDefinition();
3863       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
3864     } else
3865       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
3866     CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy);
3867     // Define type kmp_depend_info[<Dependences.size()>];
3868     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
3869         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
3870         ArrayType::Normal, /*IndexTypeQuals=*/0);
3871     // kmp_depend_info[<Dependences.size()>] deps;
3872     DependenciesArray =
3873         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
3874     for (unsigned i = 0; i < NumDependencies; ++i) {
3875       const Expr *E = Data.Dependences[i].second;
3876       auto Addr = CGF.EmitLValue(E);
3877       llvm::Value *Size;
3878       QualType Ty = E->getType();
3879       if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
3880         LValue UpAddrLVal =
3881             CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
3882         llvm::Value *UpAddr =
3883             CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
3884         llvm::Value *LowIntPtr =
3885             CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
3886         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
3887         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3888       } else
3889         Size = CGF.getTypeSize(Ty);
3890       auto Base = CGF.MakeAddrLValue(
3891           CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize),
3892           KmpDependInfoTy);
3893       // deps[i].base_addr = &<Dependences[i].second>;
3894       auto BaseAddrLVal = CGF.EmitLValueForField(
3895           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
3896       CGF.EmitStoreOfScalar(
3897           CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
3898           BaseAddrLVal);
3899       // deps[i].len = sizeof(<Dependences[i].second>);
3900       auto LenLVal = CGF.EmitLValueForField(
3901           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
3902       CGF.EmitStoreOfScalar(Size, LenLVal);
3903       // deps[i].flags = <Dependences[i].first>;
3904       RTLDependenceKindTy DepKind;
3905       switch (Data.Dependences[i].first) {
3906       case OMPC_DEPEND_in:
3907         DepKind = DepIn;
3908         break;
3909       // Out and InOut dependencies must use the same code.
3910       case OMPC_DEPEND_out:
3911       case OMPC_DEPEND_inout:
3912         DepKind = DepInOut;
3913         break;
3914       case OMPC_DEPEND_source:
3915       case OMPC_DEPEND_sink:
3916       case OMPC_DEPEND_unknown:
3917         llvm_unreachable("Unknown task dependence type");
3918       }
3919       auto FlagsLVal = CGF.EmitLValueForField(
3920           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
3921       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
3922                             FlagsLVal);
3923     }
3924     DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3925         CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()),
3926         CGF.VoidPtrTy);
3927   }
3928 
3929   // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
3930   // libcall.
3931   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
3932   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
3933   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
3934   // list is not empty
3935   auto *ThreadID = getThreadID(CGF, Loc);
3936   auto *UpLoc = emitUpdateLocation(CGF, Loc);
3937   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
3938   llvm::Value *DepTaskArgs[7];
3939   if (NumDependencies) {
3940     DepTaskArgs[0] = UpLoc;
3941     DepTaskArgs[1] = ThreadID;
3942     DepTaskArgs[2] = NewTask;
3943     DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
3944     DepTaskArgs[4] = DependenciesArray.getPointer();
3945     DepTaskArgs[5] = CGF.Builder.getInt32(0);
3946     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3947   }
3948   auto &&ThenCodeGen = [this, Loc, &Data, TDBase, KmpTaskTQTyRD,
3949                         NumDependencies, &TaskArgs,
3950                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
3951     if (!Data.Tied) {
3952       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3953       auto PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
3954       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
3955     }
3956     if (NumDependencies) {
3957       CGF.EmitRuntimeCall(
3958           createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
3959     } else {
3960       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
3961                           TaskArgs);
3962     }
3963     // Check if parent region is untied and build return for untied task;
3964     if (auto *Region =
3965             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3966       Region->emitUntiedSwitch(CGF);
3967   };
3968 
3969   llvm::Value *DepWaitTaskArgs[6];
3970   if (NumDependencies) {
3971     DepWaitTaskArgs[0] = UpLoc;
3972     DepWaitTaskArgs[1] = ThreadID;
3973     DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
3974     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
3975     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
3976     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3977   }
3978   auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
3979                         NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF,
3980                                                            PrePostActionTy &) {
3981     auto &RT = CGF.CGM.getOpenMPRuntime();
3982     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
3983     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
3984     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
3985     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
3986     // is specified.
3987     if (NumDependencies)
3988       CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
3989                           DepWaitTaskArgs);
3990     // Call proxy_task_entry(gtid, new_task);
3991     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy](
3992         CodeGenFunction &CGF, PrePostActionTy &Action) {
3993       Action.Enter(CGF);
3994       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
3995       CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs);
3996     };
3997 
3998     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
3999     // kmp_task_t *new_task);
4000     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4001     // kmp_task_t *new_task);
4002     RegionCodeGenTy RCG(CodeGen);
4003     CommonActionTy Action(
4004         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
4005         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
4006     RCG.setAction(Action);
4007     RCG(CGF);
4008   };
4009 
4010   if (IfCond)
4011     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4012   else {
4013     RegionCodeGenTy ThenRCG(ThenCodeGen);
4014     ThenRCG(CGF);
4015   }
4016 }
4017 
4018 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
4019                                        const OMPLoopDirective &D,
4020                                        llvm::Value *TaskFunction,
4021                                        QualType SharedsTy, Address Shareds,
4022                                        const Expr *IfCond,
4023                                        const OMPTaskDataTy &Data) {
4024   if (!CGF.HaveInsertPoint())
4025     return;
4026   TaskResultTy Result =
4027       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4028   // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
4029   // libcall.
4030   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4031   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4032   // sched, kmp_uint64 grainsize, void *task_dup);
4033   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4034   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4035   llvm::Value *IfVal;
4036   if (IfCond) {
4037     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4038                                       /*isSigned=*/true);
4039   } else
4040     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4041 
4042   LValue LBLVal = CGF.EmitLValueForField(
4043       Result.TDBase,
4044       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4045   auto *LBVar =
4046       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4047   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
4048                        /*IsInitializer=*/true);
4049   LValue UBLVal = CGF.EmitLValueForField(
4050       Result.TDBase,
4051       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4052   auto *UBVar =
4053       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4054   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
4055                        /*IsInitializer=*/true);
4056   LValue StLVal = CGF.EmitLValueForField(
4057       Result.TDBase,
4058       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4059   auto *StVar =
4060       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4061   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
4062                        /*IsInitializer=*/true);
4063   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4064   llvm::Value *TaskArgs[] = {
4065       UpLoc, ThreadID, Result.NewTask, IfVal, LBLVal.getPointer(),
4066       UBLVal.getPointer(), CGF.EmitLoadOfScalar(StLVal, SourceLocation()),
4067       llvm::ConstantInt::getSigned(CGF.IntTy, Data.Nogroup ? 1 : 0),
4068       llvm::ConstantInt::getSigned(
4069           CGF.IntTy, Data.Schedule.getPointer()
4070                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
4071                          : NoSchedule),
4072       Data.Schedule.getPointer()
4073           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4074                                       /*isSigned=*/false)
4075           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
4076       Result.TaskDupFn
4077           ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Result.TaskDupFn,
4078                                                             CGF.VoidPtrTy)
4079           : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
4080   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
4081 }
4082 
4083 /// \brief Emit reduction operation for each element of array (required for
4084 /// array sections) LHS op = RHS.
4085 /// \param Type Type of array.
4086 /// \param LHSVar Variable on the left side of the reduction operation
4087 /// (references element of array in original variable).
4088 /// \param RHSVar Variable on the right side of the reduction operation
4089 /// (references element of array in original variable).
4090 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
4091 /// RHSVar.
4092 static void EmitOMPAggregateReduction(
4093     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4094     const VarDecl *RHSVar,
4095     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4096                                   const Expr *, const Expr *)> &RedOpGen,
4097     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4098     const Expr *UpExpr = nullptr) {
4099   // Perform element-by-element initialization.
4100   QualType ElementTy;
4101   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4102   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4103 
4104   // Drill down to the base element type on both arrays.
4105   auto ArrayTy = Type->getAsArrayTypeUnsafe();
4106   auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4107 
4108   auto RHSBegin = RHSAddr.getPointer();
4109   auto LHSBegin = LHSAddr.getPointer();
4110   // Cast from pointer to array type to pointer to single element.
4111   auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
4112   // The basic structure here is a while-do loop.
4113   auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4114   auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4115   auto IsEmpty =
4116       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4117   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4118 
4119   // Enter the loop body, making that address the current address.
4120   auto EntryBB = CGF.Builder.GetInsertBlock();
4121   CGF.EmitBlock(BodyBB);
4122 
4123   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4124 
4125   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4126       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4127   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4128   Address RHSElementCurrent =
4129       Address(RHSElementPHI,
4130               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4131 
4132   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4133       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4134   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4135   Address LHSElementCurrent =
4136       Address(LHSElementPHI,
4137               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4138 
4139   // Emit copy.
4140   CodeGenFunction::OMPPrivateScope Scope(CGF);
4141   Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; });
4142   Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; });
4143   Scope.Privatize();
4144   RedOpGen(CGF, XExpr, EExpr, UpExpr);
4145   Scope.ForceCleanup();
4146 
4147   // Shift the address forward by one element.
4148   auto LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4149       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
4150   auto RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4151       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
4152   // Check whether we've reached the end.
4153   auto Done =
4154       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4155   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4156   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4157   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4158 
4159   // Done.
4160   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4161 }
4162 
4163 /// Emit reduction combiner. If the combiner is a simple expression emit it as
4164 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4165 /// UDR combiner function.
4166 static void emitReductionCombiner(CodeGenFunction &CGF,
4167                                   const Expr *ReductionOp) {
4168   if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
4169     if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4170       if (auto *DRE =
4171               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4172         if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4173           std::pair<llvm::Function *, llvm::Function *> Reduction =
4174               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
4175           RValue Func = RValue::get(Reduction.first);
4176           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4177           CGF.EmitIgnoredExpr(ReductionOp);
4178           return;
4179         }
4180   CGF.EmitIgnoredExpr(ReductionOp);
4181 }
4182 
4183 static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
4184                                           llvm::Type *ArgsType,
4185                                           ArrayRef<const Expr *> Privates,
4186                                           ArrayRef<const Expr *> LHSExprs,
4187                                           ArrayRef<const Expr *> RHSExprs,
4188                                           ArrayRef<const Expr *> ReductionOps) {
4189   auto &C = CGM.getContext();
4190 
4191   // void reduction_func(void *LHSArg, void *RHSArg);
4192   FunctionArgList Args;
4193   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
4194                            C.VoidPtrTy);
4195   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
4196                            C.VoidPtrTy);
4197   Args.push_back(&LHSArg);
4198   Args.push_back(&RHSArg);
4199   auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4200   auto *Fn = llvm::Function::Create(
4201       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
4202       ".omp.reduction.reduction_func", &CGM.getModule());
4203   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
4204   CodeGenFunction CGF(CGM);
4205   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
4206 
4207   // Dst = (void*[n])(LHSArg);
4208   // Src = (void*[n])(RHSArg);
4209   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4210       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4211       ArgsType), CGF.getPointerAlign());
4212   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4213       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
4214       ArgsType), CGF.getPointerAlign());
4215 
4216   //  ...
4217   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4218   //  ...
4219   CodeGenFunction::OMPPrivateScope Scope(CGF);
4220   auto IPriv = Privates.begin();
4221   unsigned Idx = 0;
4222   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4223     auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
4224     Scope.addPrivate(RHSVar, [&]() -> Address {
4225       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
4226     });
4227     auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
4228     Scope.addPrivate(LHSVar, [&]() -> Address {
4229       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
4230     });
4231     QualType PrivTy = (*IPriv)->getType();
4232     if (PrivTy->isVariablyModifiedType()) {
4233       // Get array size and emit VLA type.
4234       ++Idx;
4235       Address Elem =
4236           CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize());
4237       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
4238       auto *VLA = CGF.getContext().getAsVariableArrayType(PrivTy);
4239       auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
4240       CodeGenFunction::OpaqueValueMapping OpaqueMap(
4241           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
4242       CGF.EmitVariablyModifiedType(PrivTy);
4243     }
4244   }
4245   Scope.Privatize();
4246   IPriv = Privates.begin();
4247   auto ILHS = LHSExprs.begin();
4248   auto IRHS = RHSExprs.begin();
4249   for (auto *E : ReductionOps) {
4250     if ((*IPriv)->getType()->isArrayType()) {
4251       // Emit reduction for array section.
4252       auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4253       auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4254       EmitOMPAggregateReduction(
4255           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4256           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4257             emitReductionCombiner(CGF, E);
4258           });
4259     } else
4260       // Emit reduction for array subscript or single variable.
4261       emitReductionCombiner(CGF, E);
4262     ++IPriv;
4263     ++ILHS;
4264     ++IRHS;
4265   }
4266   Scope.ForceCleanup();
4267   CGF.FinishFunction();
4268   return Fn;
4269 }
4270 
4271 static void emitSingleReductionCombiner(CodeGenFunction &CGF,
4272                                         const Expr *ReductionOp,
4273                                         const Expr *PrivateRef,
4274                                         const DeclRefExpr *LHS,
4275                                         const DeclRefExpr *RHS) {
4276   if (PrivateRef->getType()->isArrayType()) {
4277     // Emit reduction for array section.
4278     auto *LHSVar = cast<VarDecl>(LHS->getDecl());
4279     auto *RHSVar = cast<VarDecl>(RHS->getDecl());
4280     EmitOMPAggregateReduction(
4281         CGF, PrivateRef->getType(), LHSVar, RHSVar,
4282         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4283           emitReductionCombiner(CGF, ReductionOp);
4284         });
4285   } else
4286     // Emit reduction for array subscript or single variable.
4287     emitReductionCombiner(CGF, ReductionOp);
4288 }
4289 
4290 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
4291                                     ArrayRef<const Expr *> Privates,
4292                                     ArrayRef<const Expr *> LHSExprs,
4293                                     ArrayRef<const Expr *> RHSExprs,
4294                                     ArrayRef<const Expr *> ReductionOps,
4295                                     bool WithNowait, bool SimpleReduction) {
4296   if (!CGF.HaveInsertPoint())
4297     return;
4298   // Next code should be emitted for reduction:
4299   //
4300   // static kmp_critical_name lock = { 0 };
4301   //
4302   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
4303   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
4304   //  ...
4305   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
4306   //  *(Type<n>-1*)rhs[<n>-1]);
4307   // }
4308   //
4309   // ...
4310   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
4311   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
4312   // RedList, reduce_func, &<lock>)) {
4313   // case 1:
4314   //  ...
4315   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4316   //  ...
4317   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4318   // break;
4319   // case 2:
4320   //  ...
4321   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
4322   //  ...
4323   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
4324   // break;
4325   // default:;
4326   // }
4327   //
4328   // if SimpleReduction is true, only the next code is generated:
4329   //  ...
4330   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4331   //  ...
4332 
4333   auto &C = CGM.getContext();
4334 
4335   if (SimpleReduction) {
4336     CodeGenFunction::RunCleanupsScope Scope(CGF);
4337     auto IPriv = Privates.begin();
4338     auto ILHS = LHSExprs.begin();
4339     auto IRHS = RHSExprs.begin();
4340     for (auto *E : ReductionOps) {
4341       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
4342                                   cast<DeclRefExpr>(*IRHS));
4343       ++IPriv;
4344       ++ILHS;
4345       ++IRHS;
4346     }
4347     return;
4348   }
4349 
4350   // 1. Build a list of reduction variables.
4351   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
4352   auto Size = RHSExprs.size();
4353   for (auto *E : Privates) {
4354     if (E->getType()->isVariablyModifiedType())
4355       // Reserve place for array size.
4356       ++Size;
4357   }
4358   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
4359   QualType ReductionArrayTy =
4360       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
4361                              /*IndexTypeQuals=*/0);
4362   Address ReductionList =
4363       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
4364   auto IPriv = Privates.begin();
4365   unsigned Idx = 0;
4366   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
4367     Address Elem =
4368       CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize());
4369     CGF.Builder.CreateStore(
4370         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4371             CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
4372         Elem);
4373     if ((*IPriv)->getType()->isVariablyModifiedType()) {
4374       // Store array size.
4375       ++Idx;
4376       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
4377                                              CGF.getPointerSize());
4378       llvm::Value *Size = CGF.Builder.CreateIntCast(
4379           CGF.getVLASize(
4380                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
4381               .first,
4382           CGF.SizeTy, /*isSigned=*/false);
4383       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
4384                               Elem);
4385     }
4386   }
4387 
4388   // 2. Emit reduce_func().
4389   auto *ReductionFn = emitReductionFunction(
4390       CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
4391       LHSExprs, RHSExprs, ReductionOps);
4392 
4393   // 3. Create static kmp_critical_name lock = { 0 };
4394   auto *Lock = getCriticalRegionLock(".reduction");
4395 
4396   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
4397   // RedList, reduce_func, &<lock>);
4398   auto *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
4399   auto *ThreadId = getThreadID(CGF, Loc);
4400   auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
4401   auto *RL =
4402     CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList.getPointer(),
4403                                                     CGF.VoidPtrTy);
4404   llvm::Value *Args[] = {
4405       IdentTLoc,                             // ident_t *<loc>
4406       ThreadId,                              // i32 <gtid>
4407       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
4408       ReductionArrayTySize,                  // size_type sizeof(RedList)
4409       RL,                                    // void *RedList
4410       ReductionFn, // void (*) (void *, void *) <reduce_func>
4411       Lock         // kmp_critical_name *&<lock>
4412   };
4413   auto Res = CGF.EmitRuntimeCall(
4414       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
4415                                        : OMPRTL__kmpc_reduce),
4416       Args);
4417 
4418   // 5. Build switch(res)
4419   auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
4420   auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
4421 
4422   // 6. Build case 1:
4423   //  ...
4424   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4425   //  ...
4426   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4427   // break;
4428   auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
4429   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
4430   CGF.EmitBlock(Case1BB);
4431 
4432   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4433   llvm::Value *EndArgs[] = {
4434       IdentTLoc, // ident_t *<loc>
4435       ThreadId,  // i32 <gtid>
4436       Lock       // kmp_critical_name *&<lock>
4437   };
4438   auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps](
4439       CodeGenFunction &CGF, PrePostActionTy &Action) {
4440     auto IPriv = Privates.begin();
4441     auto ILHS = LHSExprs.begin();
4442     auto IRHS = RHSExprs.begin();
4443     for (auto *E : ReductionOps) {
4444       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
4445                                   cast<DeclRefExpr>(*IRHS));
4446       ++IPriv;
4447       ++ILHS;
4448       ++IRHS;
4449     }
4450   };
4451   RegionCodeGenTy RCG(CodeGen);
4452   CommonActionTy Action(
4453       nullptr, llvm::None,
4454       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
4455                                        : OMPRTL__kmpc_end_reduce),
4456       EndArgs);
4457   RCG.setAction(Action);
4458   RCG(CGF);
4459 
4460   CGF.EmitBranch(DefaultBB);
4461 
4462   // 7. Build case 2:
4463   //  ...
4464   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
4465   //  ...
4466   // break;
4467   auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
4468   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
4469   CGF.EmitBlock(Case2BB);
4470 
4471   auto &&AtomicCodeGen = [Loc, &Privates, &LHSExprs, &RHSExprs, &ReductionOps](
4472       CodeGenFunction &CGF, PrePostActionTy &Action) {
4473     auto ILHS = LHSExprs.begin();
4474     auto IRHS = RHSExprs.begin();
4475     auto IPriv = Privates.begin();
4476     for (auto *E : ReductionOps) {
4477       const Expr *XExpr = nullptr;
4478       const Expr *EExpr = nullptr;
4479       const Expr *UpExpr = nullptr;
4480       BinaryOperatorKind BO = BO_Comma;
4481       if (auto *BO = dyn_cast<BinaryOperator>(E)) {
4482         if (BO->getOpcode() == BO_Assign) {
4483           XExpr = BO->getLHS();
4484           UpExpr = BO->getRHS();
4485         }
4486       }
4487       // Try to emit update expression as a simple atomic.
4488       auto *RHSExpr = UpExpr;
4489       if (RHSExpr) {
4490         // Analyze RHS part of the whole expression.
4491         if (auto *ACO = dyn_cast<AbstractConditionalOperator>(
4492                 RHSExpr->IgnoreParenImpCasts())) {
4493           // If this is a conditional operator, analyze its condition for
4494           // min/max reduction operator.
4495           RHSExpr = ACO->getCond();
4496         }
4497         if (auto *BORHS =
4498                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
4499           EExpr = BORHS->getRHS();
4500           BO = BORHS->getOpcode();
4501         }
4502       }
4503       if (XExpr) {
4504         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4505         auto &&AtomicRedGen = [BO, VD, IPriv,
4506                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
4507                                     const Expr *EExpr, const Expr *UpExpr) {
4508           LValue X = CGF.EmitLValue(XExpr);
4509           RValue E;
4510           if (EExpr)
4511             E = CGF.EmitAnyExpr(EExpr);
4512           CGF.EmitOMPAtomicSimpleUpdateExpr(
4513               X, E, BO, /*IsXLHSInRHSPart=*/true,
4514               llvm::AtomicOrdering::Monotonic, Loc,
4515               [&CGF, UpExpr, VD, IPriv, Loc](RValue XRValue) {
4516                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4517                 PrivateScope.addPrivate(
4518                     VD, [&CGF, VD, XRValue, Loc]() -> Address {
4519                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
4520                       CGF.emitOMPSimpleStore(
4521                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
4522                           VD->getType().getNonReferenceType(), Loc);
4523                       return LHSTemp;
4524                     });
4525                 (void)PrivateScope.Privatize();
4526                 return CGF.EmitAnyExpr(UpExpr);
4527               });
4528         };
4529         if ((*IPriv)->getType()->isArrayType()) {
4530           // Emit atomic reduction for array section.
4531           auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4532           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
4533                                     AtomicRedGen, XExpr, EExpr, UpExpr);
4534         } else
4535           // Emit atomic reduction for array subscript or single variable.
4536           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
4537       } else {
4538         // Emit as a critical region.
4539         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
4540                                      const Expr *, const Expr *) {
4541           auto &RT = CGF.CGM.getOpenMPRuntime();
4542           RT.emitCriticalRegion(
4543               CGF, ".atomic_reduction",
4544               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
4545                 Action.Enter(CGF);
4546                 emitReductionCombiner(CGF, E);
4547               },
4548               Loc);
4549         };
4550         if ((*IPriv)->getType()->isArrayType()) {
4551           auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4552           auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4553           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4554                                     CritRedGen);
4555         } else
4556           CritRedGen(CGF, nullptr, nullptr, nullptr);
4557       }
4558       ++ILHS;
4559       ++IRHS;
4560       ++IPriv;
4561     }
4562   };
4563   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
4564   if (!WithNowait) {
4565     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
4566     llvm::Value *EndArgs[] = {
4567         IdentTLoc, // ident_t *<loc>
4568         ThreadId,  // i32 <gtid>
4569         Lock       // kmp_critical_name *&<lock>
4570     };
4571     CommonActionTy Action(nullptr, llvm::None,
4572                           createRuntimeFunction(OMPRTL__kmpc_end_reduce),
4573                           EndArgs);
4574     AtomicRCG.setAction(Action);
4575     AtomicRCG(CGF);
4576   } else
4577     AtomicRCG(CGF);
4578 
4579   CGF.EmitBranch(DefaultBB);
4580   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
4581 }
4582 
4583 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
4584                                        SourceLocation Loc) {
4585   if (!CGF.HaveInsertPoint())
4586     return;
4587   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
4588   // global_tid);
4589   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
4590   // Ignore return result until untied tasks are supported.
4591   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
4592   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4593     Region->emitUntiedSwitch(CGF);
4594 }
4595 
4596 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
4597                                            OpenMPDirectiveKind InnerKind,
4598                                            const RegionCodeGenTy &CodeGen,
4599                                            bool HasCancel) {
4600   if (!CGF.HaveInsertPoint())
4601     return;
4602   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
4603   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
4604 }
4605 
4606 namespace {
4607 enum RTCancelKind {
4608   CancelNoreq = 0,
4609   CancelParallel = 1,
4610   CancelLoop = 2,
4611   CancelSections = 3,
4612   CancelTaskgroup = 4
4613 };
4614 } // anonymous namespace
4615 
4616 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
4617   RTCancelKind CancelKind = CancelNoreq;
4618   if (CancelRegion == OMPD_parallel)
4619     CancelKind = CancelParallel;
4620   else if (CancelRegion == OMPD_for)
4621     CancelKind = CancelLoop;
4622   else if (CancelRegion == OMPD_sections)
4623     CancelKind = CancelSections;
4624   else {
4625     assert(CancelRegion == OMPD_taskgroup);
4626     CancelKind = CancelTaskgroup;
4627   }
4628   return CancelKind;
4629 }
4630 
4631 void CGOpenMPRuntime::emitCancellationPointCall(
4632     CodeGenFunction &CGF, SourceLocation Loc,
4633     OpenMPDirectiveKind CancelRegion) {
4634   if (!CGF.HaveInsertPoint())
4635     return;
4636   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
4637   // global_tid, kmp_int32 cncl_kind);
4638   if (auto *OMPRegionInfo =
4639           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
4640     if (OMPRegionInfo->hasCancel()) {
4641       llvm::Value *Args[] = {
4642           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
4643           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
4644       // Ignore return result until untied tasks are supported.
4645       auto *Result = CGF.EmitRuntimeCall(
4646           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
4647       // if (__kmpc_cancellationpoint()) {
4648       //  __kmpc_cancel_barrier();
4649       //   exit from construct;
4650       // }
4651       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
4652       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
4653       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
4654       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
4655       CGF.EmitBlock(ExitBB);
4656       // __kmpc_cancel_barrier();
4657       emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
4658       // exit from construct;
4659       auto CancelDest =
4660           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
4661       CGF.EmitBranchThroughCleanup(CancelDest);
4662       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
4663     }
4664   }
4665 }
4666 
4667 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
4668                                      const Expr *IfCond,
4669                                      OpenMPDirectiveKind CancelRegion) {
4670   if (!CGF.HaveInsertPoint())
4671     return;
4672   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
4673   // kmp_int32 cncl_kind);
4674   if (auto *OMPRegionInfo =
4675           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
4676     auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
4677                                                         PrePostActionTy &) {
4678       auto &RT = CGF.CGM.getOpenMPRuntime();
4679       llvm::Value *Args[] = {
4680           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
4681           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
4682       // Ignore return result until untied tasks are supported.
4683       auto *Result = CGF.EmitRuntimeCall(
4684           RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
4685       // if (__kmpc_cancel()) {
4686       //  __kmpc_cancel_barrier();
4687       //   exit from construct;
4688       // }
4689       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
4690       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
4691       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
4692       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
4693       CGF.EmitBlock(ExitBB);
4694       // __kmpc_cancel_barrier();
4695       RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
4696       // exit from construct;
4697       auto CancelDest =
4698           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
4699       CGF.EmitBranchThroughCleanup(CancelDest);
4700       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
4701     };
4702     if (IfCond)
4703       emitOMPIfClause(CGF, IfCond, ThenGen,
4704                       [](CodeGenFunction &, PrePostActionTy &) {});
4705     else {
4706       RegionCodeGenTy ThenRCG(ThenGen);
4707       ThenRCG(CGF);
4708     }
4709   }
4710 }
4711 
4712 /// \brief Obtain information that uniquely identifies a target entry. This
4713 /// consists of the file and device IDs as well as line number associated with
4714 /// the relevant entry source location.
4715 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
4716                                      unsigned &DeviceID, unsigned &FileID,
4717                                      unsigned &LineNum) {
4718 
4719   auto &SM = C.getSourceManager();
4720 
4721   // The loc should be always valid and have a file ID (the user cannot use
4722   // #pragma directives in macros)
4723 
4724   assert(Loc.isValid() && "Source location is expected to be always valid.");
4725   assert(Loc.isFileID() && "Source location is expected to refer to a file.");
4726 
4727   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
4728   assert(PLoc.isValid() && "Source location is expected to be always valid.");
4729 
4730   llvm::sys::fs::UniqueID ID;
4731   if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
4732     llvm_unreachable("Source file with target region no longer exists!");
4733 
4734   DeviceID = ID.getDevice();
4735   FileID = ID.getFile();
4736   LineNum = PLoc.getLine();
4737 }
4738 
4739 void CGOpenMPRuntime::emitTargetOutlinedFunction(
4740     const OMPExecutableDirective &D, StringRef ParentName,
4741     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
4742     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
4743   assert(!ParentName.empty() && "Invalid target region parent name!");
4744 
4745   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
4746                                    IsOffloadEntry, CodeGen);
4747 }
4748 
4749 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
4750     const OMPExecutableDirective &D, StringRef ParentName,
4751     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
4752     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
4753   // Create a unique name for the entry function using the source location
4754   // information of the current target region. The name will be something like:
4755   //
4756   // __omp_offloading_DD_FFFF_PP_lBB
4757   //
4758   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
4759   // mangled name of the function that encloses the target region and BB is the
4760   // line number of the target region.
4761 
4762   unsigned DeviceID;
4763   unsigned FileID;
4764   unsigned Line;
4765   getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID,
4766                            Line);
4767   SmallString<64> EntryFnName;
4768   {
4769     llvm::raw_svector_ostream OS(EntryFnName);
4770     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
4771        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
4772   }
4773 
4774   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
4775 
4776   CodeGenFunction CGF(CGM, true);
4777   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
4778   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
4779 
4780   OutlinedFn =
4781       CGF.GenerateOpenMPCapturedStmtFunction(CS, /*CastValToPtr=*/true);
4782 
4783   // If this target outline function is not an offload entry, we don't need to
4784   // register it.
4785   if (!IsOffloadEntry)
4786     return;
4787 
4788   // The target region ID is used by the runtime library to identify the current
4789   // target region, so it only has to be unique and not necessarily point to
4790   // anything. It could be the pointer to the outlined function that implements
4791   // the target region, but we aren't using that so that the compiler doesn't
4792   // need to keep that, and could therefore inline the host function if proven
4793   // worthwhile during optimization. In the other hand, if emitting code for the
4794   // device, the ID has to be the function address so that it can retrieved from
4795   // the offloading entry and launched by the runtime library. We also mark the
4796   // outlined function to have external linkage in case we are emitting code for
4797   // the device, because these functions will be entry points to the device.
4798 
4799   if (CGM.getLangOpts().OpenMPIsDevice) {
4800     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
4801     OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage);
4802   } else
4803     OutlinedFnID = new llvm::GlobalVariable(
4804         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
4805         llvm::GlobalValue::PrivateLinkage,
4806         llvm::Constant::getNullValue(CGM.Int8Ty), ".omp_offload.region_id");
4807 
4808   // Register the information for the entry associated with this target region.
4809   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
4810       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID);
4811 }
4812 
4813 /// discard all CompoundStmts intervening between two constructs
4814 static const Stmt *ignoreCompoundStmts(const Stmt *Body) {
4815   while (auto *CS = dyn_cast_or_null<CompoundStmt>(Body))
4816     Body = CS->body_front();
4817 
4818   return Body;
4819 }
4820 
4821 /// \brief Emit the num_teams clause of an enclosed teams directive at the
4822 /// target region scope. If there is no teams directive associated with the
4823 /// target directive, or if there is no num_teams clause associated with the
4824 /// enclosed teams directive, return nullptr.
4825 static llvm::Value *
4826 emitNumTeamsClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime,
4827                                      CodeGenFunction &CGF,
4828                                      const OMPExecutableDirective &D) {
4829 
4830   assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
4831                                               "teams directive expected to be "
4832                                               "emitted only for the host!");
4833 
4834   // FIXME: For the moment we do not support combined directives with target and
4835   // teams, so we do not expect to get any num_teams clause in the provided
4836   // directive. Once we support that, this assertion can be replaced by the
4837   // actual emission of the clause expression.
4838   assert(D.getSingleClause<OMPNumTeamsClause>() == nullptr &&
4839          "Not expecting clause in directive.");
4840 
4841   // If the current target region has a teams region enclosed, we need to get
4842   // the number of teams to pass to the runtime function call. This is done
4843   // by generating the expression in a inlined region. This is required because
4844   // the expression is captured in the enclosing target environment when the
4845   // teams directive is not combined with target.
4846 
4847   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
4848 
4849   // FIXME: Accommodate other combined directives with teams when they become
4850   // available.
4851   if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>(
4852           ignoreCompoundStmts(CS.getCapturedStmt()))) {
4853     if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) {
4854       CGOpenMPInnerExprInfo CGInfo(CGF, CS);
4855       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
4856       llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams());
4857       return CGF.Builder.CreateIntCast(NumTeams, CGF.Int32Ty,
4858                                        /*IsSigned=*/true);
4859     }
4860 
4861     // If we have an enclosed teams directive but no num_teams clause we use
4862     // the default value 0.
4863     return CGF.Builder.getInt32(0);
4864   }
4865 
4866   // No teams associated with the directive.
4867   return nullptr;
4868 }
4869 
4870 /// \brief Emit the thread_limit clause of an enclosed teams directive at the
4871 /// target region scope. If there is no teams directive associated with the
4872 /// target directive, or if there is no thread_limit clause associated with the
4873 /// enclosed teams directive, return nullptr.
4874 static llvm::Value *
4875 emitThreadLimitClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime,
4876                                         CodeGenFunction &CGF,
4877                                         const OMPExecutableDirective &D) {
4878 
4879   assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
4880                                               "teams directive expected to be "
4881                                               "emitted only for the host!");
4882 
4883   // FIXME: For the moment we do not support combined directives with target and
4884   // teams, so we do not expect to get any thread_limit clause in the provided
4885   // directive. Once we support that, this assertion can be replaced by the
4886   // actual emission of the clause expression.
4887   assert(D.getSingleClause<OMPThreadLimitClause>() == nullptr &&
4888          "Not expecting clause in directive.");
4889 
4890   // If the current target region has a teams region enclosed, we need to get
4891   // the thread limit to pass to the runtime function call. This is done
4892   // by generating the expression in a inlined region. This is required because
4893   // the expression is captured in the enclosing target environment when the
4894   // teams directive is not combined with target.
4895 
4896   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
4897 
4898   // FIXME: Accommodate other combined directives with teams when they become
4899   // available.
4900   if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>(
4901           ignoreCompoundStmts(CS.getCapturedStmt()))) {
4902     if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) {
4903       CGOpenMPInnerExprInfo CGInfo(CGF, CS);
4904       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
4905       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit());
4906       return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty,
4907                                        /*IsSigned=*/true);
4908     }
4909 
4910     // If we have an enclosed teams directive but no thread_limit clause we use
4911     // the default value 0.
4912     return CGF.Builder.getInt32(0);
4913   }
4914 
4915   // No teams associated with the directive.
4916   return nullptr;
4917 }
4918 
4919 namespace {
4920 // \brief Utility to handle information from clauses associated with a given
4921 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
4922 // It provides a convenient interface to obtain the information and generate
4923 // code for that information.
4924 class MappableExprsHandler {
4925 public:
4926   /// \brief Values for bit flags used to specify the mapping type for
4927   /// offloading.
4928   enum OpenMPOffloadMappingFlags {
4929     /// \brief Allocate memory on the device and move data from host to device.
4930     OMP_MAP_TO = 0x01,
4931     /// \brief Allocate memory on the device and move data from device to host.
4932     OMP_MAP_FROM = 0x02,
4933     /// \brief Always perform the requested mapping action on the element, even
4934     /// if it was already mapped before.
4935     OMP_MAP_ALWAYS = 0x04,
4936     /// \brief Delete the element from the device environment, ignoring the
4937     /// current reference count associated with the element.
4938     OMP_MAP_DELETE = 0x08,
4939     /// \brief The element being mapped is a pointer, therefore the pointee
4940     /// should be mapped as well.
4941     OMP_MAP_IS_PTR = 0x10,
4942     /// \brief This flags signals that an argument is the first one relating to
4943     /// a map/private clause expression. For some cases a single
4944     /// map/privatization results in multiple arguments passed to the runtime
4945     /// library.
4946     OMP_MAP_FIRST_REF = 0x20,
4947     /// \brief This flag signals that the reference being passed is a pointer to
4948     /// private data.
4949     OMP_MAP_PRIVATE_PTR = 0x80,
4950     /// \brief Pass the element to the device by value.
4951     OMP_MAP_PRIVATE_VAL = 0x100,
4952   };
4953 
4954   typedef SmallVector<llvm::Value *, 16> MapValuesArrayTy;
4955   typedef SmallVector<unsigned, 16> MapFlagsArrayTy;
4956 
4957 private:
4958   /// \brief Directive from where the map clauses were extracted.
4959   const OMPExecutableDirective &Directive;
4960 
4961   /// \brief Function the directive is being generated for.
4962   CodeGenFunction &CGF;
4963 
4964   /// \brief Set of all first private variables in the current directive.
4965   llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls;
4966 
4967   llvm::Value *getExprTypeSize(const Expr *E) const {
4968     auto ExprTy = E->getType().getCanonicalType();
4969 
4970     // Reference types are ignored for mapping purposes.
4971     if (auto *RefTy = ExprTy->getAs<ReferenceType>())
4972       ExprTy = RefTy->getPointeeType().getCanonicalType();
4973 
4974     // Given that an array section is considered a built-in type, we need to
4975     // do the calculation based on the length of the section instead of relying
4976     // on CGF.getTypeSize(E->getType()).
4977     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
4978       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
4979                             OAE->getBase()->IgnoreParenImpCasts())
4980                             .getCanonicalType();
4981 
4982       // If there is no length associated with the expression, that means we
4983       // are using the whole length of the base.
4984       if (!OAE->getLength() && OAE->getColonLoc().isValid())
4985         return CGF.getTypeSize(BaseTy);
4986 
4987       llvm::Value *ElemSize;
4988       if (auto *PTy = BaseTy->getAs<PointerType>())
4989         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
4990       else {
4991         auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
4992         assert(ATy && "Expecting array type if not a pointer type.");
4993         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
4994       }
4995 
4996       // If we don't have a length at this point, that is because we have an
4997       // array section with a single element.
4998       if (!OAE->getLength())
4999         return ElemSize;
5000 
5001       auto *LengthVal = CGF.EmitScalarExpr(OAE->getLength());
5002       LengthVal =
5003           CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false);
5004       return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
5005     }
5006     return CGF.getTypeSize(ExprTy);
5007   }
5008 
5009   /// \brief Return the corresponding bits for a given map clause modifier. Add
5010   /// a flag marking the map as a pointer if requested. Add a flag marking the
5011   /// map as the first one of a series of maps that relate to the same map
5012   /// expression.
5013   unsigned getMapTypeBits(OpenMPMapClauseKind MapType,
5014                           OpenMPMapClauseKind MapTypeModifier, bool AddPtrFlag,
5015                           bool AddIsFirstFlag) const {
5016     unsigned Bits = 0u;
5017     switch (MapType) {
5018     case OMPC_MAP_alloc:
5019     case OMPC_MAP_release:
5020       // alloc and release is the default behavior in the runtime library,  i.e.
5021       // if we don't pass any bits alloc/release that is what the runtime is
5022       // going to do. Therefore, we don't need to signal anything for these two
5023       // type modifiers.
5024       break;
5025     case OMPC_MAP_to:
5026       Bits = OMP_MAP_TO;
5027       break;
5028     case OMPC_MAP_from:
5029       Bits = OMP_MAP_FROM;
5030       break;
5031     case OMPC_MAP_tofrom:
5032       Bits = OMP_MAP_TO | OMP_MAP_FROM;
5033       break;
5034     case OMPC_MAP_delete:
5035       Bits = OMP_MAP_DELETE;
5036       break;
5037     default:
5038       llvm_unreachable("Unexpected map type!");
5039       break;
5040     }
5041     if (AddPtrFlag)
5042       Bits |= OMP_MAP_IS_PTR;
5043     if (AddIsFirstFlag)
5044       Bits |= OMP_MAP_FIRST_REF;
5045     if (MapTypeModifier == OMPC_MAP_always)
5046       Bits |= OMP_MAP_ALWAYS;
5047     return Bits;
5048   }
5049 
5050   /// \brief Return true if the provided expression is a final array section. A
5051   /// final array section, is one whose length can't be proved to be one.
5052   bool isFinalArraySectionExpression(const Expr *E) const {
5053     auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
5054 
5055     // It is not an array section and therefore not a unity-size one.
5056     if (!OASE)
5057       return false;
5058 
5059     // An array section with no colon always refer to a single element.
5060     if (OASE->getColonLoc().isInvalid())
5061       return false;
5062 
5063     auto *Length = OASE->getLength();
5064 
5065     // If we don't have a length we have to check if the array has size 1
5066     // for this dimension. Also, we should always expect a length if the
5067     // base type is pointer.
5068     if (!Length) {
5069       auto BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
5070                          OASE->getBase()->IgnoreParenImpCasts())
5071                          .getCanonicalType();
5072       if (auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
5073         return ATy->getSize().getSExtValue() != 1;
5074       // If we don't have a constant dimension length, we have to consider
5075       // the current section as having any size, so it is not necessarily
5076       // unitary. If it happen to be unity size, that's user fault.
5077       return true;
5078     }
5079 
5080     // Check if the length evaluates to 1.
5081     llvm::APSInt ConstLength;
5082     if (!Length->EvaluateAsInt(ConstLength, CGF.getContext()))
5083       return true; // Can have more that size 1.
5084 
5085     return ConstLength.getSExtValue() != 1;
5086   }
5087 
5088   /// \brief Generate the base pointers, section pointers, sizes and map type
5089   /// bits for the provided map type, map modifier, and expression components.
5090   /// \a IsFirstComponent should be set to true if the provided set of
5091   /// components is the first associated with a capture.
5092   void generateInfoForComponentList(
5093       OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier,
5094       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
5095       MapValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
5096       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
5097       bool IsFirstComponentList) const {
5098 
5099     // The following summarizes what has to be generated for each map and the
5100     // types bellow. The generated information is expressed in this order:
5101     // base pointer, section pointer, size, flags
5102     // (to add to the ones that come from the map type and modifier).
5103     //
5104     // double d;
5105     // int i[100];
5106     // float *p;
5107     //
5108     // struct S1 {
5109     //   int i;
5110     //   float f[50];
5111     // }
5112     // struct S2 {
5113     //   int i;
5114     //   float f[50];
5115     //   S1 s;
5116     //   double *p;
5117     //   struct S2 *ps;
5118     // }
5119     // S2 s;
5120     // S2 *ps;
5121     //
5122     // map(d)
5123     // &d, &d, sizeof(double), noflags
5124     //
5125     // map(i)
5126     // &i, &i, 100*sizeof(int), noflags
5127     //
5128     // map(i[1:23])
5129     // &i(=&i[0]), &i[1], 23*sizeof(int), noflags
5130     //
5131     // map(p)
5132     // &p, &p, sizeof(float*), noflags
5133     //
5134     // map(p[1:24])
5135     // p, &p[1], 24*sizeof(float), noflags
5136     //
5137     // map(s)
5138     // &s, &s, sizeof(S2), noflags
5139     //
5140     // map(s.i)
5141     // &s, &(s.i), sizeof(int), noflags
5142     //
5143     // map(s.s.f)
5144     // &s, &(s.i.f), 50*sizeof(int), noflags
5145     //
5146     // map(s.p)
5147     // &s, &(s.p), sizeof(double*), noflags
5148     //
5149     // map(s.p[:22], s.a s.b)
5150     // &s, &(s.p), sizeof(double*), noflags
5151     // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag + extra_flag
5152     //
5153     // map(s.ps)
5154     // &s, &(s.ps), sizeof(S2*), noflags
5155     //
5156     // map(s.ps->s.i)
5157     // &s, &(s.ps), sizeof(S2*), noflags
5158     // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag + extra_flag
5159     //
5160     // map(s.ps->ps)
5161     // &s, &(s.ps), sizeof(S2*), noflags
5162     // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
5163     //
5164     // map(s.ps->ps->ps)
5165     // &s, &(s.ps), sizeof(S2*), noflags
5166     // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
5167     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
5168     //
5169     // map(s.ps->ps->s.f[:22])
5170     // &s, &(s.ps), sizeof(S2*), noflags
5171     // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
5172     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + extra_flag
5173     //
5174     // map(ps)
5175     // &ps, &ps, sizeof(S2*), noflags
5176     //
5177     // map(ps->i)
5178     // ps, &(ps->i), sizeof(int), noflags
5179     //
5180     // map(ps->s.f)
5181     // ps, &(ps->s.f[0]), 50*sizeof(float), noflags
5182     //
5183     // map(ps->p)
5184     // ps, &(ps->p), sizeof(double*), noflags
5185     //
5186     // map(ps->p[:22])
5187     // ps, &(ps->p), sizeof(double*), noflags
5188     // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag + extra_flag
5189     //
5190     // map(ps->ps)
5191     // ps, &(ps->ps), sizeof(S2*), noflags
5192     //
5193     // map(ps->ps->s.i)
5194     // ps, &(ps->ps), sizeof(S2*), noflags
5195     // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag + extra_flag
5196     //
5197     // map(ps->ps->ps)
5198     // ps, &(ps->ps), sizeof(S2*), noflags
5199     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
5200     //
5201     // map(ps->ps->ps->ps)
5202     // ps, &(ps->ps), sizeof(S2*), noflags
5203     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
5204     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
5205     //
5206     // map(ps->ps->ps->s.f[:22])
5207     // ps, &(ps->ps), sizeof(S2*), noflags
5208     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
5209     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag +
5210     // extra_flag
5211 
5212     // Track if the map information being generated is the first for a capture.
5213     bool IsCaptureFirstInfo = IsFirstComponentList;
5214 
5215     // Scan the components from the base to the complete expression.
5216     auto CI = Components.rbegin();
5217     auto CE = Components.rend();
5218     auto I = CI;
5219 
5220     // Track if the map information being generated is the first for a list of
5221     // components.
5222     bool IsExpressionFirstInfo = true;
5223     llvm::Value *BP = nullptr;
5224 
5225     if (auto *ME = dyn_cast<MemberExpr>(I->getAssociatedExpression())) {
5226       // The base is the 'this' pointer. The content of the pointer is going
5227       // to be the base of the field being mapped.
5228       BP = CGF.EmitScalarExpr(ME->getBase());
5229     } else {
5230       // The base is the reference to the variable.
5231       // BP = &Var.
5232       BP = CGF.EmitLValue(cast<DeclRefExpr>(I->getAssociatedExpression()))
5233                .getPointer();
5234 
5235       // If the variable is a pointer and is being dereferenced (i.e. is not
5236       // the last component), the base has to be the pointer itself, not his
5237       // reference.
5238       if (I->getAssociatedDeclaration()->getType()->isAnyPointerType() &&
5239           std::next(I) != CE) {
5240         auto PtrAddr = CGF.MakeNaturalAlignAddrLValue(
5241             BP, I->getAssociatedDeclaration()->getType());
5242         BP = CGF.EmitLoadOfPointerLValue(PtrAddr.getAddress(),
5243                                          I->getAssociatedDeclaration()
5244                                              ->getType()
5245                                              ->getAs<PointerType>())
5246                  .getPointer();
5247 
5248         // We do not need to generate individual map information for the
5249         // pointer, it can be associated with the combined storage.
5250         ++I;
5251       }
5252     }
5253 
5254     for (; I != CE; ++I) {
5255       auto Next = std::next(I);
5256 
5257       // We need to generate the addresses and sizes if this is the last
5258       // component, if the component is a pointer or if it is an array section
5259       // whose length can't be proved to be one. If this is a pointer, it
5260       // becomes the base address for the following components.
5261 
5262       // A final array section, is one whose length can't be proved to be one.
5263       bool IsFinalArraySection =
5264           isFinalArraySectionExpression(I->getAssociatedExpression());
5265 
5266       // Get information on whether the element is a pointer. Have to do a
5267       // special treatment for array sections given that they are built-in
5268       // types.
5269       const auto *OASE =
5270           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
5271       bool IsPointer =
5272           (OASE &&
5273            OMPArraySectionExpr::getBaseOriginalType(OASE)
5274                .getCanonicalType()
5275                ->isAnyPointerType()) ||
5276           I->getAssociatedExpression()->getType()->isAnyPointerType();
5277 
5278       if (Next == CE || IsPointer || IsFinalArraySection) {
5279 
5280         // If this is not the last component, we expect the pointer to be
5281         // associated with an array expression or member expression.
5282         assert((Next == CE ||
5283                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
5284                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
5285                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
5286                "Unexpected expression");
5287 
5288         // Save the base we are currently using.
5289         BasePointers.push_back(BP);
5290 
5291         auto *LB = CGF.EmitLValue(I->getAssociatedExpression()).getPointer();
5292         auto *Size = getExprTypeSize(I->getAssociatedExpression());
5293 
5294         Pointers.push_back(LB);
5295         Sizes.push_back(Size);
5296         // We need to add a pointer flag for each map that comes from the
5297         // same expression except for the first one. We also need to signal
5298         // this map is the first one that relates with the current capture
5299         // (there is a set of entries for each capture).
5300         Types.push_back(getMapTypeBits(MapType, MapTypeModifier,
5301                                        !IsExpressionFirstInfo,
5302                                        IsCaptureFirstInfo));
5303 
5304         // If we have a final array section, we are done with this expression.
5305         if (IsFinalArraySection)
5306           break;
5307 
5308         // The pointer becomes the base for the next element.
5309         if (Next != CE)
5310           BP = LB;
5311 
5312         IsExpressionFirstInfo = false;
5313         IsCaptureFirstInfo = false;
5314         continue;
5315       }
5316     }
5317   }
5318 
5319   /// \brief Return the adjusted map modifiers if the declaration a capture
5320   /// refers to appears in a first-private clause. This is expected to be used
5321   /// only with directives that start with 'target'.
5322   unsigned adjustMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap,
5323                                                unsigned CurrentModifiers) {
5324     assert(Cap.capturesVariable() && "Expected capture by reference only!");
5325 
5326     // A first private variable captured by reference will use only the
5327     // 'private ptr' and 'map to' flag. Return the right flags if the captured
5328     // declaration is known as first-private in this handler.
5329     if (FirstPrivateDecls.count(Cap.getCapturedVar()))
5330       return MappableExprsHandler::OMP_MAP_PRIVATE_PTR |
5331              MappableExprsHandler::OMP_MAP_TO;
5332 
5333     // We didn't modify anything.
5334     return CurrentModifiers;
5335   }
5336 
5337 public:
5338   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
5339       : Directive(Dir), CGF(CGF) {
5340     // Extract firstprivate clause information.
5341     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
5342       for (const auto *D : C->varlists())
5343         FirstPrivateDecls.insert(
5344             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
5345   }
5346 
5347   /// \brief Generate all the base pointers, section pointers, sizes and map
5348   /// types for the extracted mappable expressions.
5349   void generateAllInfo(MapValuesArrayTy &BasePointers,
5350                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
5351                        MapFlagsArrayTy &Types) const {
5352     BasePointers.clear();
5353     Pointers.clear();
5354     Sizes.clear();
5355     Types.clear();
5356 
5357     struct MapInfo {
5358       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
5359       OpenMPMapClauseKind MapType;
5360       OpenMPMapClauseKind MapTypeModifier;
5361     };
5362 
5363     // We have to process the component lists that relate with the same
5364     // declaration in a single chunk so that we can generate the map flags
5365     // correctly. Therefore, we organize all lists in a map.
5366     llvm::DenseMap<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
5367 
5368     // Helper function to fill the information map for the different supported
5369     // clauses.
5370     auto &&InfoGen =
5371         [&Info](const ValueDecl *D,
5372                 OMPClauseMappableExprCommon::MappableExprComponentListRef L,
5373                 OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier) {
5374           const ValueDecl *VD =
5375               D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
5376           Info[VD].push_back({L, MapType, MapModifier});
5377         };
5378 
5379     for (auto *C : Directive.getClausesOfKind<OMPMapClause>())
5380       for (auto L : C->component_lists())
5381         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier());
5382     for (auto *C : Directive.getClausesOfKind<OMPToClause>())
5383       for (auto L : C->component_lists())
5384         InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown);
5385     for (auto *C : Directive.getClausesOfKind<OMPFromClause>())
5386       for (auto L : C->component_lists())
5387         InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown);
5388 
5389     for (auto &M : Info) {
5390       // We need to know when we generate information for the first component
5391       // associated with a capture, because the mapping flags depend on it.
5392       bool IsFirstComponentList = true;
5393       for (MapInfo &L : M.second) {
5394         assert(!L.Components.empty() &&
5395                "Not expecting declaration with no component lists.");
5396         generateInfoForComponentList(L.MapType, L.MapTypeModifier, L.Components,
5397                                      BasePointers, Pointers, Sizes, Types,
5398                                      IsFirstComponentList);
5399         IsFirstComponentList = false;
5400       }
5401     }
5402   }
5403 
5404   /// \brief Generate the base pointers, section pointers, sizes and map types
5405   /// associated to a given capture.
5406   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
5407                               MapValuesArrayTy &BasePointers,
5408                               MapValuesArrayTy &Pointers,
5409                               MapValuesArrayTy &Sizes,
5410                               MapFlagsArrayTy &Types) const {
5411     assert(!Cap->capturesVariableArrayType() &&
5412            "Not expecting to generate map info for a variable array type!");
5413 
5414     BasePointers.clear();
5415     Pointers.clear();
5416     Sizes.clear();
5417     Types.clear();
5418 
5419     const ValueDecl *VD =
5420         Cap->capturesThis()
5421             ? nullptr
5422             : cast<ValueDecl>(Cap->getCapturedVar()->getCanonicalDecl());
5423 
5424     // We need to know when we generating information for the first component
5425     // associated with a capture, because the mapping flags depend on it.
5426     bool IsFirstComponentList = true;
5427     for (auto *C : Directive.getClausesOfKind<OMPMapClause>())
5428       for (auto L : C->decl_component_lists(VD)) {
5429         assert(L.first == VD &&
5430                "We got information for the wrong declaration??");
5431         assert(!L.second.empty() &&
5432                "Not expecting declaration with no component lists.");
5433         generateInfoForComponentList(C->getMapType(), C->getMapTypeModifier(),
5434                                      L.second, BasePointers, Pointers, Sizes,
5435                                      Types, IsFirstComponentList);
5436         IsFirstComponentList = false;
5437       }
5438 
5439     return;
5440   }
5441 
5442   /// \brief Generate the default map information for a given capture \a CI,
5443   /// record field declaration \a RI and captured value \a CV.
5444   void generateDefaultMapInfo(
5445       const CapturedStmt::Capture &CI, const FieldDecl &RI, llvm::Value *CV,
5446       MappableExprsHandler::MapValuesArrayTy &CurBasePointers,
5447       MappableExprsHandler::MapValuesArrayTy &CurPointers,
5448       MappableExprsHandler::MapValuesArrayTy &CurSizes,
5449       MappableExprsHandler::MapFlagsArrayTy &CurMapTypes) {
5450     auto &Ctx = CGF.getContext();
5451 
5452     // Do the default mapping.
5453     if (CI.capturesThis()) {
5454       CurBasePointers.push_back(CV);
5455       CurPointers.push_back(CV);
5456       const PointerType *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
5457       CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType()));
5458       // Default map type.
5459       CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_TO |
5460                             MappableExprsHandler::OMP_MAP_FROM);
5461     } else if (CI.capturesVariableByCopy()) {
5462       if (!RI.getType()->isAnyPointerType()) {
5463         // If the field is not a pointer, we need to save the actual value
5464         // and load it as a void pointer.
5465         CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_PRIVATE_VAL);
5466         auto DstAddr = CGF.CreateMemTemp(Ctx.getUIntPtrType(),
5467                                          Twine(CI.getCapturedVar()->getName()) +
5468                                              ".casted");
5469         LValue DstLV = CGF.MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
5470 
5471         auto *SrcAddrVal = CGF.EmitScalarConversion(
5472             DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
5473             Ctx.getPointerType(RI.getType()), SourceLocation());
5474         LValue SrcLV = CGF.MakeNaturalAlignAddrLValue(SrcAddrVal, RI.getType());
5475 
5476         // Store the value using the source type pointer.
5477         CGF.EmitStoreThroughLValue(RValue::get(CV), SrcLV);
5478 
5479         // Load the value using the destination type pointer.
5480         CurBasePointers.push_back(
5481             CGF.EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal());
5482         CurPointers.push_back(CurBasePointers.back());
5483 
5484         // Get the size of the type to be used in the map.
5485         CurSizes.push_back(CGF.getTypeSize(RI.getType()));
5486       } else {
5487         // Pointers are implicitly mapped with a zero size and no flags
5488         // (other than first map that is added for all implicit maps).
5489         CurMapTypes.push_back(0u);
5490         CurBasePointers.push_back(CV);
5491         CurPointers.push_back(CV);
5492         CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy));
5493       }
5494     } else {
5495       assert(CI.capturesVariable() && "Expected captured reference.");
5496       CurBasePointers.push_back(CV);
5497       CurPointers.push_back(CV);
5498 
5499       const ReferenceType *PtrTy =
5500           cast<ReferenceType>(RI.getType().getTypePtr());
5501       QualType ElementType = PtrTy->getPointeeType();
5502       CurSizes.push_back(CGF.getTypeSize(ElementType));
5503       // The default map type for a scalar/complex type is 'to' because by
5504       // default the value doesn't have to be retrieved. For an aggregate
5505       // type, the default is 'tofrom'.
5506       CurMapTypes.push_back(ElementType->isAggregateType()
5507                                 ? (MappableExprsHandler::OMP_MAP_TO |
5508                                    MappableExprsHandler::OMP_MAP_FROM)
5509                                 : MappableExprsHandler::OMP_MAP_TO);
5510 
5511       // If we have a capture by reference we may need to add the private
5512       // pointer flag if the base declaration shows in some first-private
5513       // clause.
5514       CurMapTypes.back() =
5515           adjustMapModifiersForPrivateClauses(CI, CurMapTypes.back());
5516     }
5517     // Every default map produces a single argument, so, it is always the
5518     // first one.
5519     CurMapTypes.back() |= MappableExprsHandler::OMP_MAP_FIRST_REF;
5520   }
5521 };
5522 
5523 enum OpenMPOffloadingReservedDeviceIDs {
5524   /// \brief Device ID if the device was not defined, runtime should get it
5525   /// from environment variables in the spec.
5526   OMP_DEVICEID_UNDEF = -1,
5527 };
5528 } // anonymous namespace
5529 
5530 /// \brief Emit the arrays used to pass the captures and map information to the
5531 /// offloading runtime library. If there is no map or capture information,
5532 /// return nullptr by reference.
5533 static void
5534 emitOffloadingArrays(CodeGenFunction &CGF, llvm::Value *&BasePointersArray,
5535                      llvm::Value *&PointersArray, llvm::Value *&SizesArray,
5536                      llvm::Value *&MapTypesArray,
5537                      MappableExprsHandler::MapValuesArrayTy &BasePointers,
5538                      MappableExprsHandler::MapValuesArrayTy &Pointers,
5539                      MappableExprsHandler::MapValuesArrayTy &Sizes,
5540                      MappableExprsHandler::MapFlagsArrayTy &MapTypes) {
5541   auto &CGM = CGF.CGM;
5542   auto &Ctx = CGF.getContext();
5543 
5544   BasePointersArray = PointersArray = SizesArray = MapTypesArray = nullptr;
5545 
5546   if (unsigned PointerNumVal = BasePointers.size()) {
5547     // Detect if we have any capture size requiring runtime evaluation of the
5548     // size so that a constant array could be eventually used.
5549     bool hasRuntimeEvaluationCaptureSize = false;
5550     for (auto *S : Sizes)
5551       if (!isa<llvm::Constant>(S)) {
5552         hasRuntimeEvaluationCaptureSize = true;
5553         break;
5554       }
5555 
5556     llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true);
5557     QualType PointerArrayType =
5558         Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
5559                                  /*IndexTypeQuals=*/0);
5560 
5561     BasePointersArray =
5562         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
5563     PointersArray =
5564         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
5565 
5566     // If we don't have any VLA types or other types that require runtime
5567     // evaluation, we can use a constant array for the map sizes, otherwise we
5568     // need to fill up the arrays as we do for the pointers.
5569     if (hasRuntimeEvaluationCaptureSize) {
5570       QualType SizeArrayType = Ctx.getConstantArrayType(
5571           Ctx.getSizeType(), PointerNumAP, ArrayType::Normal,
5572           /*IndexTypeQuals=*/0);
5573       SizesArray =
5574           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
5575     } else {
5576       // We expect all the sizes to be constant, so we collect them to create
5577       // a constant array.
5578       SmallVector<llvm::Constant *, 16> ConstSizes;
5579       for (auto S : Sizes)
5580         ConstSizes.push_back(cast<llvm::Constant>(S));
5581 
5582       auto *SizesArrayInit = llvm::ConstantArray::get(
5583           llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes);
5584       auto *SizesArrayGbl = new llvm::GlobalVariable(
5585           CGM.getModule(), SizesArrayInit->getType(),
5586           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
5587           SizesArrayInit, ".offload_sizes");
5588       SizesArrayGbl->setUnnamedAddr(true);
5589       SizesArray = SizesArrayGbl;
5590     }
5591 
5592     // The map types are always constant so we don't need to generate code to
5593     // fill arrays. Instead, we create an array constant.
5594     llvm::Constant *MapTypesArrayInit =
5595         llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes);
5596     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
5597         CGM.getModule(), MapTypesArrayInit->getType(),
5598         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
5599         MapTypesArrayInit, ".offload_maptypes");
5600     MapTypesArrayGbl->setUnnamedAddr(true);
5601     MapTypesArray = MapTypesArrayGbl;
5602 
5603     for (unsigned i = 0; i < PointerNumVal; ++i) {
5604       llvm::Value *BPVal = BasePointers[i];
5605       if (BPVal->getType()->isPointerTy())
5606         BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy);
5607       else {
5608         assert(BPVal->getType()->isIntegerTy() &&
5609                "If not a pointer, the value type must be an integer.");
5610         BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy);
5611       }
5612       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
5613           llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), BasePointersArray,
5614           0, i);
5615       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
5616       CGF.Builder.CreateStore(BPVal, BPAddr);
5617 
5618       llvm::Value *PVal = Pointers[i];
5619       if (PVal->getType()->isPointerTy())
5620         PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy);
5621       else {
5622         assert(PVal->getType()->isIntegerTy() &&
5623                "If not a pointer, the value type must be an integer.");
5624         PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy);
5625       }
5626       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
5627           llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray, 0,
5628           i);
5629       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
5630       CGF.Builder.CreateStore(PVal, PAddr);
5631 
5632       if (hasRuntimeEvaluationCaptureSize) {
5633         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
5634             llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray,
5635             /*Idx0=*/0,
5636             /*Idx1=*/i);
5637         Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType()));
5638         CGF.Builder.CreateStore(
5639             CGF.Builder.CreateIntCast(Sizes[i], CGM.SizeTy, /*isSigned=*/true),
5640             SAddr);
5641       }
5642     }
5643   }
5644 }
5645 /// \brief Emit the arguments to be passed to the runtime library based on the
5646 /// arrays of pointers, sizes and map types.
5647 static void emitOffloadingArraysArgument(
5648     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
5649     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
5650     llvm::Value *&MapTypesArrayArg, llvm::Value *BasePointersArray,
5651     llvm::Value *PointersArray, llvm::Value *SizesArray,
5652     llvm::Value *MapTypesArray, unsigned NumElems) {
5653   auto &CGM = CGF.CGM;
5654   if (NumElems) {
5655     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
5656         llvm::ArrayType::get(CGM.VoidPtrTy, NumElems), BasePointersArray,
5657         /*Idx0=*/0, /*Idx1=*/0);
5658     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
5659         llvm::ArrayType::get(CGM.VoidPtrTy, NumElems), PointersArray,
5660         /*Idx0=*/0,
5661         /*Idx1=*/0);
5662     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
5663         llvm::ArrayType::get(CGM.SizeTy, NumElems), SizesArray,
5664         /*Idx0=*/0, /*Idx1=*/0);
5665     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
5666         llvm::ArrayType::get(CGM.Int32Ty, NumElems), MapTypesArray,
5667         /*Idx0=*/0,
5668         /*Idx1=*/0);
5669   } else {
5670     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
5671     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
5672     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo());
5673     MapTypesArrayArg =
5674         llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo());
5675   }
5676 }
5677 
5678 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
5679                                      const OMPExecutableDirective &D,
5680                                      llvm::Value *OutlinedFn,
5681                                      llvm::Value *OutlinedFnID,
5682                                      const Expr *IfCond, const Expr *Device,
5683                                      ArrayRef<llvm::Value *> CapturedVars) {
5684   if (!CGF.HaveInsertPoint())
5685     return;
5686 
5687   assert(OutlinedFn && "Invalid outlined function!");
5688 
5689   auto &Ctx = CGF.getContext();
5690 
5691   // Fill up the arrays with all the captured variables.
5692   MappableExprsHandler::MapValuesArrayTy KernelArgs;
5693   MappableExprsHandler::MapValuesArrayTy BasePointers;
5694   MappableExprsHandler::MapValuesArrayTy Pointers;
5695   MappableExprsHandler::MapValuesArrayTy Sizes;
5696   MappableExprsHandler::MapFlagsArrayTy MapTypes;
5697 
5698   MappableExprsHandler::MapValuesArrayTy CurBasePointers;
5699   MappableExprsHandler::MapValuesArrayTy CurPointers;
5700   MappableExprsHandler::MapValuesArrayTy CurSizes;
5701   MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
5702 
5703   // Get mappable expression information.
5704   MappableExprsHandler MEHandler(D, CGF);
5705 
5706   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
5707   auto RI = CS.getCapturedRecordDecl()->field_begin();
5708   auto CV = CapturedVars.begin();
5709   for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
5710                                             CE = CS.capture_end();
5711        CI != CE; ++CI, ++RI, ++CV) {
5712     StringRef Name;
5713     QualType Ty;
5714 
5715     CurBasePointers.clear();
5716     CurPointers.clear();
5717     CurSizes.clear();
5718     CurMapTypes.clear();
5719 
5720     // VLA sizes are passed to the outlined region by copy and do not have map
5721     // information associated.
5722     if (CI->capturesVariableArrayType()) {
5723       CurBasePointers.push_back(*CV);
5724       CurPointers.push_back(*CV);
5725       CurSizes.push_back(CGF.getTypeSize(RI->getType()));
5726       // Copy to the device as an argument. No need to retrieve it.
5727       CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_PRIVATE_VAL |
5728                             MappableExprsHandler::OMP_MAP_FIRST_REF);
5729     } else {
5730       // If we have any information in the map clause, we use it, otherwise we
5731       // just do a default mapping.
5732       MEHandler.generateInfoForCapture(CI, CurBasePointers, CurPointers,
5733                                        CurSizes, CurMapTypes);
5734       if (CurBasePointers.empty())
5735         MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
5736                                          CurPointers, CurSizes, CurMapTypes);
5737     }
5738     // We expect to have at least an element of information for this capture.
5739     assert(!CurBasePointers.empty() && "Non-existing map pointer for capture!");
5740     assert(CurBasePointers.size() == CurPointers.size() &&
5741            CurBasePointers.size() == CurSizes.size() &&
5742            CurBasePointers.size() == CurMapTypes.size() &&
5743            "Inconsistent map information sizes!");
5744 
5745     // The kernel args are always the first elements of the base pointers
5746     // associated with a capture.
5747     KernelArgs.push_back(CurBasePointers.front());
5748     // We need to append the results of this capture to what we already have.
5749     BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
5750     Pointers.append(CurPointers.begin(), CurPointers.end());
5751     Sizes.append(CurSizes.begin(), CurSizes.end());
5752     MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
5753   }
5754 
5755   // Keep track on whether the host function has to be executed.
5756   auto OffloadErrorQType =
5757       Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true);
5758   auto OffloadError = CGF.MakeAddrLValue(
5759       CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"),
5760       OffloadErrorQType);
5761   CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty),
5762                         OffloadError);
5763 
5764   // Fill up the pointer arrays and transfer execution to the device.
5765   auto &&ThenGen = [&Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes, Device,
5766                     OutlinedFnID, OffloadError, OffloadErrorQType,
5767                     &D](CodeGenFunction &CGF, PrePostActionTy &) {
5768     auto &RT = CGF.CGM.getOpenMPRuntime();
5769     // Emit the offloading arrays.
5770     llvm::Value *BasePointersArray;
5771     llvm::Value *PointersArray;
5772     llvm::Value *SizesArray;
5773     llvm::Value *MapTypesArray;
5774     emitOffloadingArrays(CGF, BasePointersArray, PointersArray, SizesArray,
5775                          MapTypesArray, BasePointers, Pointers, Sizes,
5776                          MapTypes);
5777     emitOffloadingArraysArgument(CGF, BasePointersArray, PointersArray,
5778                                  SizesArray, MapTypesArray, BasePointersArray,
5779                                  PointersArray, SizesArray, MapTypesArray,
5780                                  BasePointers.size());
5781 
5782     // On top of the arrays that were filled up, the target offloading call
5783     // takes as arguments the device id as well as the host pointer. The host
5784     // pointer is used by the runtime library to identify the current target
5785     // region, so it only has to be unique and not necessarily point to
5786     // anything. It could be the pointer to the outlined function that
5787     // implements the target region, but we aren't using that so that the
5788     // compiler doesn't need to keep that, and could therefore inline the host
5789     // function if proven worthwhile during optimization.
5790 
5791     // From this point on, we need to have an ID of the target region defined.
5792     assert(OutlinedFnID && "Invalid outlined function ID!");
5793 
5794     // Emit device ID if any.
5795     llvm::Value *DeviceID;
5796     if (Device)
5797       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
5798                                            CGF.Int32Ty, /*isSigned=*/true);
5799     else
5800       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
5801 
5802     // Emit the number of elements in the offloading arrays.
5803     llvm::Value *PointerNum = CGF.Builder.getInt32(BasePointers.size());
5804 
5805     // Return value of the runtime offloading call.
5806     llvm::Value *Return;
5807 
5808     auto *NumTeams = emitNumTeamsClauseForTargetDirective(RT, CGF, D);
5809     auto *ThreadLimit = emitThreadLimitClauseForTargetDirective(RT, CGF, D);
5810 
5811     // If we have NumTeams defined this means that we have an enclosed teams
5812     // region. Therefore we also expect to have ThreadLimit defined. These two
5813     // values should be defined in the presence of a teams directive, regardless
5814     // of having any clauses associated. If the user is using teams but no
5815     // clauses, these two values will be the default that should be passed to
5816     // the runtime library - a 32-bit integer with the value zero.
5817     if (NumTeams) {
5818       assert(ThreadLimit && "Thread limit expression should be available along "
5819                             "with number of teams.");
5820       llvm::Value *OffloadingArgs[] = {
5821           DeviceID,          OutlinedFnID,  PointerNum,
5822           BasePointersArray, PointersArray, SizesArray,
5823           MapTypesArray,     NumTeams,      ThreadLimit};
5824       Return = CGF.EmitRuntimeCall(
5825           RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs);
5826     } else {
5827       llvm::Value *OffloadingArgs[] = {
5828           DeviceID,      OutlinedFnID, PointerNum,   BasePointersArray,
5829           PointersArray, SizesArray,   MapTypesArray};
5830       Return = CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target),
5831                                    OffloadingArgs);
5832     }
5833 
5834     CGF.EmitStoreOfScalar(Return, OffloadError);
5835   };
5836 
5837   // Notify that the host version must be executed.
5838   auto &&ElseGen = [OffloadError](CodeGenFunction &CGF, PrePostActionTy &) {
5839     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/-1u),
5840                           OffloadError);
5841   };
5842 
5843   // If we have a target function ID it means that we need to support
5844   // offloading, otherwise, just execute on the host. We need to execute on host
5845   // regardless of the conditional in the if clause if, e.g., the user do not
5846   // specify target triples.
5847   if (OutlinedFnID) {
5848     if (IfCond)
5849       emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
5850     else {
5851       RegionCodeGenTy ThenRCG(ThenGen);
5852       ThenRCG(CGF);
5853     }
5854   } else {
5855     RegionCodeGenTy ElseRCG(ElseGen);
5856     ElseRCG(CGF);
5857   }
5858 
5859   // Check the error code and execute the host version if required.
5860   auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed");
5861   auto OffloadContBlock = CGF.createBasicBlock("omp_offload.cont");
5862   auto OffloadErrorVal = CGF.EmitLoadOfScalar(OffloadError, SourceLocation());
5863   auto Failed = CGF.Builder.CreateIsNotNull(OffloadErrorVal);
5864   CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
5865 
5866   CGF.EmitBlock(OffloadFailedBlock);
5867   CGF.Builder.CreateCall(OutlinedFn, KernelArgs);
5868   CGF.EmitBranch(OffloadContBlock);
5869 
5870   CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
5871 }
5872 
5873 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
5874                                                     StringRef ParentName) {
5875   if (!S)
5876     return;
5877 
5878   // If we find a OMP target directive, codegen the outline function and
5879   // register the result.
5880   // FIXME: Add other directives with target when they become supported.
5881   bool isTargetDirective = isa<OMPTargetDirective>(S);
5882 
5883   if (isTargetDirective) {
5884     auto *E = cast<OMPExecutableDirective>(S);
5885     unsigned DeviceID;
5886     unsigned FileID;
5887     unsigned Line;
5888     getTargetEntryUniqueInfo(CGM.getContext(), E->getLocStart(), DeviceID,
5889                              FileID, Line);
5890 
5891     // Is this a target region that should not be emitted as an entry point? If
5892     // so just signal we are done with this target region.
5893     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
5894                                                             ParentName, Line))
5895       return;
5896 
5897     llvm::Function *Fn;
5898     llvm::Constant *Addr;
5899     std::tie(Fn, Addr) =
5900         CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction(
5901             CGM, cast<OMPTargetDirective>(*E), ParentName,
5902             /*isOffloadEntry=*/true);
5903     assert(Fn && Addr && "Target region emission failed.");
5904     return;
5905   }
5906 
5907   if (const OMPExecutableDirective *E = dyn_cast<OMPExecutableDirective>(S)) {
5908     if (!E->hasAssociatedStmt())
5909       return;
5910 
5911     scanForTargetRegionsFunctions(
5912         cast<CapturedStmt>(E->getAssociatedStmt())->getCapturedStmt(),
5913         ParentName);
5914     return;
5915   }
5916 
5917   // If this is a lambda function, look into its body.
5918   if (auto *L = dyn_cast<LambdaExpr>(S))
5919     S = L->getBody();
5920 
5921   // Keep looking for target regions recursively.
5922   for (auto *II : S->children())
5923     scanForTargetRegionsFunctions(II, ParentName);
5924 }
5925 
5926 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
5927   auto &FD = *cast<FunctionDecl>(GD.getDecl());
5928 
5929   // If emitting code for the host, we do not process FD here. Instead we do
5930   // the normal code generation.
5931   if (!CGM.getLangOpts().OpenMPIsDevice)
5932     return false;
5933 
5934   // Try to detect target regions in the function.
5935   scanForTargetRegionsFunctions(FD.getBody(), CGM.getMangledName(GD));
5936 
5937   // We should not emit any function othen that the ones created during the
5938   // scanning. Therefore, we signal that this function is completely dealt
5939   // with.
5940   return true;
5941 }
5942 
5943 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
5944   if (!CGM.getLangOpts().OpenMPIsDevice)
5945     return false;
5946 
5947   // Check if there are Ctors/Dtors in this declaration and look for target
5948   // regions in it. We use the complete variant to produce the kernel name
5949   // mangling.
5950   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
5951   if (auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
5952     for (auto *Ctor : RD->ctors()) {
5953       StringRef ParentName =
5954           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
5955       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
5956     }
5957     auto *Dtor = RD->getDestructor();
5958     if (Dtor) {
5959       StringRef ParentName =
5960           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
5961       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
5962     }
5963   }
5964 
5965   // If we are in target mode we do not emit any global (declare target is not
5966   // implemented yet). Therefore we signal that GD was processed in this case.
5967   return true;
5968 }
5969 
5970 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
5971   auto *VD = GD.getDecl();
5972   if (isa<FunctionDecl>(VD))
5973     return emitTargetFunctions(GD);
5974 
5975   return emitTargetGlobalVariable(GD);
5976 }
5977 
5978 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
5979   // If we have offloading in the current module, we need to emit the entries
5980   // now and register the offloading descriptor.
5981   createOffloadEntriesAndInfoMetadata();
5982 
5983   // Create and register the offloading binary descriptors. This is the main
5984   // entity that captures all the information about offloading in the current
5985   // compilation unit.
5986   return createOffloadingBinaryDescriptorRegistration();
5987 }
5988 
5989 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
5990                                     const OMPExecutableDirective &D,
5991                                     SourceLocation Loc,
5992                                     llvm::Value *OutlinedFn,
5993                                     ArrayRef<llvm::Value *> CapturedVars) {
5994   if (!CGF.HaveInsertPoint())
5995     return;
5996 
5997   auto *RTLoc = emitUpdateLocation(CGF, Loc);
5998   CodeGenFunction::RunCleanupsScope Scope(CGF);
5999 
6000   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
6001   llvm::Value *Args[] = {
6002       RTLoc,
6003       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
6004       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
6005   llvm::SmallVector<llvm::Value *, 16> RealArgs;
6006   RealArgs.append(std::begin(Args), std::end(Args));
6007   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
6008 
6009   auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
6010   CGF.EmitRuntimeCall(RTLFn, RealArgs);
6011 }
6012 
6013 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
6014                                          const Expr *NumTeams,
6015                                          const Expr *ThreadLimit,
6016                                          SourceLocation Loc) {
6017   if (!CGF.HaveInsertPoint())
6018     return;
6019 
6020   auto *RTLoc = emitUpdateLocation(CGF, Loc);
6021 
6022   llvm::Value *NumTeamsVal =
6023       (NumTeams)
6024           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
6025                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
6026           : CGF.Builder.getInt32(0);
6027 
6028   llvm::Value *ThreadLimitVal =
6029       (ThreadLimit)
6030           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
6031                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
6032           : CGF.Builder.getInt32(0);
6033 
6034   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
6035   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
6036                                      ThreadLimitVal};
6037   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
6038                       PushNumTeamsArgs);
6039 }
6040 
6041 void CGOpenMPRuntime::emitTargetDataCalls(CodeGenFunction &CGF,
6042                                           const OMPExecutableDirective &D,
6043                                           const Expr *IfCond,
6044                                           const Expr *Device,
6045                                           const RegionCodeGenTy &CodeGen) {
6046 
6047   if (!CGF.HaveInsertPoint())
6048     return;
6049 
6050   llvm::Value *BasePointersArray = nullptr;
6051   llvm::Value *PointersArray = nullptr;
6052   llvm::Value *SizesArray = nullptr;
6053   llvm::Value *MapTypesArray = nullptr;
6054   unsigned NumOfPtrs = 0;
6055 
6056   // Generate the code for the opening of the data environment. Capture all the
6057   // arguments of the runtime call by reference because they are used in the
6058   // closing of the region.
6059   auto &&BeginThenGen = [&D, &CGF, &BasePointersArray, &PointersArray,
6060                          &SizesArray, &MapTypesArray, Device,
6061                          &NumOfPtrs](CodeGenFunction &CGF, PrePostActionTy &) {
6062     // Fill up the arrays with all the mapped variables.
6063     MappableExprsHandler::MapValuesArrayTy BasePointers;
6064     MappableExprsHandler::MapValuesArrayTy Pointers;
6065     MappableExprsHandler::MapValuesArrayTy Sizes;
6066     MappableExprsHandler::MapFlagsArrayTy MapTypes;
6067 
6068     // Get map clause information.
6069     MappableExprsHandler MCHandler(D, CGF);
6070     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
6071     NumOfPtrs = BasePointers.size();
6072 
6073     // Fill up the arrays and create the arguments.
6074     emitOffloadingArrays(CGF, BasePointersArray, PointersArray, SizesArray,
6075                          MapTypesArray, BasePointers, Pointers, Sizes,
6076                          MapTypes);
6077 
6078     llvm::Value *BasePointersArrayArg = nullptr;
6079     llvm::Value *PointersArrayArg = nullptr;
6080     llvm::Value *SizesArrayArg = nullptr;
6081     llvm::Value *MapTypesArrayArg = nullptr;
6082     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
6083                                  SizesArrayArg, MapTypesArrayArg,
6084                                  BasePointersArray, PointersArray, SizesArray,
6085                                  MapTypesArray, NumOfPtrs);
6086 
6087     // Emit device ID if any.
6088     llvm::Value *DeviceID = nullptr;
6089     if (Device)
6090       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
6091                                            CGF.Int32Ty, /*isSigned=*/true);
6092     else
6093       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
6094 
6095     // Emit the number of elements in the offloading arrays.
6096     auto *PointerNum = CGF.Builder.getInt32(NumOfPtrs);
6097 
6098     llvm::Value *OffloadingArgs[] = {
6099         DeviceID,         PointerNum,    BasePointersArrayArg,
6100         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
6101     auto &RT = CGF.CGM.getOpenMPRuntime();
6102     CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_begin),
6103                         OffloadingArgs);
6104   };
6105 
6106   // Generate code for the closing of the data region.
6107   auto &&EndThenGen = [&CGF, &BasePointersArray, &PointersArray, &SizesArray,
6108                        &MapTypesArray, Device,
6109                        &NumOfPtrs](CodeGenFunction &CGF, PrePostActionTy &) {
6110     assert(BasePointersArray && PointersArray && SizesArray && MapTypesArray &&
6111            NumOfPtrs && "Invalid data environment closing arguments.");
6112 
6113     llvm::Value *BasePointersArrayArg = nullptr;
6114     llvm::Value *PointersArrayArg = nullptr;
6115     llvm::Value *SizesArrayArg = nullptr;
6116     llvm::Value *MapTypesArrayArg = nullptr;
6117     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
6118                                  SizesArrayArg, MapTypesArrayArg,
6119                                  BasePointersArray, PointersArray, SizesArray,
6120                                  MapTypesArray, NumOfPtrs);
6121 
6122     // Emit device ID if any.
6123     llvm::Value *DeviceID = nullptr;
6124     if (Device)
6125       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
6126                                            CGF.Int32Ty, /*isSigned=*/true);
6127     else
6128       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
6129 
6130     // Emit the number of elements in the offloading arrays.
6131     auto *PointerNum = CGF.Builder.getInt32(NumOfPtrs);
6132 
6133     llvm::Value *OffloadingArgs[] = {
6134         DeviceID,         PointerNum,    BasePointersArrayArg,
6135         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
6136     auto &RT = CGF.CGM.getOpenMPRuntime();
6137     CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_end),
6138                         OffloadingArgs);
6139   };
6140 
6141   // In the event we get an if clause, we don't have to take any action on the
6142   // else side.
6143   auto &&ElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
6144 
6145   if (IfCond) {
6146     emitOMPIfClause(CGF, IfCond, BeginThenGen, ElseGen);
6147   } else {
6148     RegionCodeGenTy BeginThenRCG(BeginThenGen);
6149     BeginThenRCG(CGF);
6150   }
6151 
6152   CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, CodeGen);
6153 
6154   if (IfCond) {
6155     emitOMPIfClause(CGF, IfCond, EndThenGen, ElseGen);
6156   } else {
6157     RegionCodeGenTy EndThenRCG(EndThenGen);
6158     EndThenRCG(CGF);
6159   }
6160 }
6161 
6162 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
6163     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
6164     const Expr *Device) {
6165   if (!CGF.HaveInsertPoint())
6166     return;
6167 
6168   assert((isa<OMPTargetEnterDataDirective>(D) ||
6169           isa<OMPTargetExitDataDirective>(D) ||
6170           isa<OMPTargetUpdateDirective>(D)) &&
6171          "Expecting either target enter, exit data, or update directives.");
6172 
6173   // Generate the code for the opening of the data environment.
6174   auto &&ThenGen = [&D, &CGF, Device](CodeGenFunction &CGF, PrePostActionTy &) {
6175     // Fill up the arrays with all the mapped variables.
6176     MappableExprsHandler::MapValuesArrayTy BasePointers;
6177     MappableExprsHandler::MapValuesArrayTy Pointers;
6178     MappableExprsHandler::MapValuesArrayTy Sizes;
6179     MappableExprsHandler::MapFlagsArrayTy MapTypes;
6180 
6181     // Get map clause information.
6182     MappableExprsHandler MEHandler(D, CGF);
6183     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
6184 
6185     llvm::Value *BasePointersArrayArg = nullptr;
6186     llvm::Value *PointersArrayArg = nullptr;
6187     llvm::Value *SizesArrayArg = nullptr;
6188     llvm::Value *MapTypesArrayArg = nullptr;
6189 
6190     // Fill up the arrays and create the arguments.
6191     emitOffloadingArrays(CGF, BasePointersArrayArg, PointersArrayArg,
6192                          SizesArrayArg, MapTypesArrayArg, BasePointers,
6193                          Pointers, Sizes, MapTypes);
6194     emitOffloadingArraysArgument(
6195         CGF, BasePointersArrayArg, PointersArrayArg, SizesArrayArg,
6196         MapTypesArrayArg, BasePointersArrayArg, PointersArrayArg, SizesArrayArg,
6197         MapTypesArrayArg, BasePointers.size());
6198 
6199     // Emit device ID if any.
6200     llvm::Value *DeviceID = nullptr;
6201     if (Device)
6202       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
6203                                            CGF.Int32Ty, /*isSigned=*/true);
6204     else
6205       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
6206 
6207     // Emit the number of elements in the offloading arrays.
6208     auto *PointerNum = CGF.Builder.getInt32(BasePointers.size());
6209 
6210     llvm::Value *OffloadingArgs[] = {
6211         DeviceID,         PointerNum,    BasePointersArrayArg,
6212         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
6213 
6214     auto &RT = CGF.CGM.getOpenMPRuntime();
6215     // Select the right runtime function call for each expected standalone
6216     // directive.
6217     OpenMPRTLFunction RTLFn;
6218     switch (D.getDirectiveKind()) {
6219     default:
6220       llvm_unreachable("Unexpected standalone target data directive.");
6221       break;
6222     case OMPD_target_enter_data:
6223       RTLFn = OMPRTL__tgt_target_data_begin;
6224       break;
6225     case OMPD_target_exit_data:
6226       RTLFn = OMPRTL__tgt_target_data_end;
6227       break;
6228     case OMPD_target_update:
6229       RTLFn = OMPRTL__tgt_target_data_update;
6230       break;
6231     }
6232     CGF.EmitRuntimeCall(RT.createRuntimeFunction(RTLFn), OffloadingArgs);
6233   };
6234 
6235   // In the event we get an if clause, we don't have to take any action on the
6236   // else side.
6237   auto &&ElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
6238 
6239   if (IfCond) {
6240     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
6241   } else {
6242     RegionCodeGenTy ThenGenRCG(ThenGen);
6243     ThenGenRCG(CGF);
6244   }
6245 }
6246 
6247 namespace {
6248   /// Kind of parameter in a function with 'declare simd' directive.
6249   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
6250   /// Attribute set of the parameter.
6251   struct ParamAttrTy {
6252     ParamKindTy Kind = Vector;
6253     llvm::APSInt StrideOrArg;
6254     llvm::APSInt Alignment;
6255   };
6256 } // namespace
6257 
6258 static unsigned evaluateCDTSize(const FunctionDecl *FD,
6259                                 ArrayRef<ParamAttrTy> ParamAttrs) {
6260   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
6261   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
6262   // of that clause. The VLEN value must be power of 2.
6263   // In other case the notion of the function`s "characteristic data type" (CDT)
6264   // is used to compute the vector length.
6265   // CDT is defined in the following order:
6266   //   a) For non-void function, the CDT is the return type.
6267   //   b) If the function has any non-uniform, non-linear parameters, then the
6268   //   CDT is the type of the first such parameter.
6269   //   c) If the CDT determined by a) or b) above is struct, union, or class
6270   //   type which is pass-by-value (except for the type that maps to the
6271   //   built-in complex data type), the characteristic data type is int.
6272   //   d) If none of the above three cases is applicable, the CDT is int.
6273   // The VLEN is then determined based on the CDT and the size of vector
6274   // register of that ISA for which current vector version is generated. The
6275   // VLEN is computed using the formula below:
6276   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
6277   // where vector register size specified in section 3.2.1 Registers and the
6278   // Stack Frame of original AMD64 ABI document.
6279   QualType RetType = FD->getReturnType();
6280   if (RetType.isNull())
6281     return 0;
6282   ASTContext &C = FD->getASTContext();
6283   QualType CDT;
6284   if (!RetType.isNull() && !RetType->isVoidType())
6285     CDT = RetType;
6286   else {
6287     unsigned Offset = 0;
6288     if (auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
6289       if (ParamAttrs[Offset].Kind == Vector)
6290         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
6291       ++Offset;
6292     }
6293     if (CDT.isNull()) {
6294       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
6295         if (ParamAttrs[I + Offset].Kind == Vector) {
6296           CDT = FD->getParamDecl(I)->getType();
6297           break;
6298         }
6299       }
6300     }
6301   }
6302   if (CDT.isNull())
6303     CDT = C.IntTy;
6304   CDT = CDT->getCanonicalTypeUnqualified();
6305   if (CDT->isRecordType() || CDT->isUnionType())
6306     CDT = C.IntTy;
6307   return C.getTypeSize(CDT);
6308 }
6309 
6310 static void
6311 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
6312                            llvm::APSInt VLENVal,
6313                            ArrayRef<ParamAttrTy> ParamAttrs,
6314                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
6315   struct ISADataTy {
6316     char ISA;
6317     unsigned VecRegSize;
6318   };
6319   ISADataTy ISAData[] = {
6320       {
6321           'b', 128
6322       }, // SSE
6323       {
6324           'c', 256
6325       }, // AVX
6326       {
6327           'd', 256
6328       }, // AVX2
6329       {
6330           'e', 512
6331       }, // AVX512
6332   };
6333   llvm::SmallVector<char, 2> Masked;
6334   switch (State) {
6335   case OMPDeclareSimdDeclAttr::BS_Undefined:
6336     Masked.push_back('N');
6337     Masked.push_back('M');
6338     break;
6339   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
6340     Masked.push_back('N');
6341     break;
6342   case OMPDeclareSimdDeclAttr::BS_Inbranch:
6343     Masked.push_back('M');
6344     break;
6345   }
6346   for (auto Mask : Masked) {
6347     for (auto &Data : ISAData) {
6348       SmallString<256> Buffer;
6349       llvm::raw_svector_ostream Out(Buffer);
6350       Out << "_ZGV" << Data.ISA << Mask;
6351       if (!VLENVal) {
6352         Out << llvm::APSInt::getUnsigned(Data.VecRegSize /
6353                                          evaluateCDTSize(FD, ParamAttrs));
6354       } else
6355         Out << VLENVal;
6356       for (auto &ParamAttr : ParamAttrs) {
6357         switch (ParamAttr.Kind){
6358         case LinearWithVarStride:
6359           Out << 's' << ParamAttr.StrideOrArg;
6360           break;
6361         case Linear:
6362           Out << 'l';
6363           if (!!ParamAttr.StrideOrArg)
6364             Out << ParamAttr.StrideOrArg;
6365           break;
6366         case Uniform:
6367           Out << 'u';
6368           break;
6369         case Vector:
6370           Out << 'v';
6371           break;
6372         }
6373         if (!!ParamAttr.Alignment)
6374           Out << 'a' << ParamAttr.Alignment;
6375       }
6376       Out << '_' << Fn->getName();
6377       Fn->addFnAttr(Out.str());
6378     }
6379   }
6380 }
6381 
6382 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
6383                                               llvm::Function *Fn) {
6384   ASTContext &C = CGM.getContext();
6385   FD = FD->getCanonicalDecl();
6386   // Map params to their positions in function decl.
6387   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
6388   if (isa<CXXMethodDecl>(FD))
6389     ParamPositions.insert({FD, 0});
6390   unsigned ParamPos = ParamPositions.size();
6391   for (auto *P : FD->params()) {
6392     ParamPositions.insert({P->getCanonicalDecl(), ParamPos});
6393     ++ParamPos;
6394   }
6395   for (auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
6396     llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
6397     // Mark uniform parameters.
6398     for (auto *E : Attr->uniforms()) {
6399       E = E->IgnoreParenImpCasts();
6400       unsigned Pos;
6401       if (isa<CXXThisExpr>(E))
6402         Pos = ParamPositions[FD];
6403       else {
6404         auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
6405                         ->getCanonicalDecl();
6406         Pos = ParamPositions[PVD];
6407       }
6408       ParamAttrs[Pos].Kind = Uniform;
6409     }
6410     // Get alignment info.
6411     auto NI = Attr->alignments_begin();
6412     for (auto *E : Attr->aligneds()) {
6413       E = E->IgnoreParenImpCasts();
6414       unsigned Pos;
6415       QualType ParmTy;
6416       if (isa<CXXThisExpr>(E)) {
6417         Pos = ParamPositions[FD];
6418         ParmTy = E->getType();
6419       } else {
6420         auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
6421                         ->getCanonicalDecl();
6422         Pos = ParamPositions[PVD];
6423         ParmTy = PVD->getType();
6424       }
6425       ParamAttrs[Pos].Alignment =
6426           (*NI) ? (*NI)->EvaluateKnownConstInt(C)
6427                 : llvm::APSInt::getUnsigned(
6428                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
6429                           .getQuantity());
6430       ++NI;
6431     }
6432     // Mark linear parameters.
6433     auto SI = Attr->steps_begin();
6434     auto MI = Attr->modifiers_begin();
6435     for (auto *E : Attr->linears()) {
6436       E = E->IgnoreParenImpCasts();
6437       unsigned Pos;
6438       if (isa<CXXThisExpr>(E))
6439         Pos = ParamPositions[FD];
6440       else {
6441         auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
6442                         ->getCanonicalDecl();
6443         Pos = ParamPositions[PVD];
6444       }
6445       auto &ParamAttr = ParamAttrs[Pos];
6446       ParamAttr.Kind = Linear;
6447       if (*SI) {
6448         if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C,
6449                                   Expr::SE_AllowSideEffects)) {
6450           if (auto *DRE = cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
6451             if (auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
6452               ParamAttr.Kind = LinearWithVarStride;
6453               ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
6454                   ParamPositions[StridePVD->getCanonicalDecl()]);
6455             }
6456           }
6457         }
6458       }
6459       ++SI;
6460       ++MI;
6461     }
6462     llvm::APSInt VLENVal;
6463     if (const Expr *VLEN = Attr->getSimdlen())
6464       VLENVal = VLEN->EvaluateKnownConstInt(C);
6465     OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
6466     if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
6467         CGM.getTriple().getArch() == llvm::Triple::x86_64)
6468       emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
6469   }
6470 }
6471 
6472 namespace {
6473 /// Cleanup action for doacross support.
6474 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
6475 public:
6476   static const int DoacrossFinArgs = 2;
6477 
6478 private:
6479   llvm::Value *RTLFn;
6480   llvm::Value *Args[DoacrossFinArgs];
6481 
6482 public:
6483   DoacrossCleanupTy(llvm::Value *RTLFn, ArrayRef<llvm::Value *> CallArgs)
6484       : RTLFn(RTLFn) {
6485     assert(CallArgs.size() == DoacrossFinArgs);
6486     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
6487   }
6488   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
6489     if (!CGF.HaveInsertPoint())
6490       return;
6491     CGF.EmitRuntimeCall(RTLFn, Args);
6492   }
6493 };
6494 } // namespace
6495 
6496 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
6497                                        const OMPLoopDirective &D) {
6498   if (!CGF.HaveInsertPoint())
6499     return;
6500 
6501   ASTContext &C = CGM.getContext();
6502   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
6503   RecordDecl *RD;
6504   if (KmpDimTy.isNull()) {
6505     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
6506     //  kmp_int64 lo; // lower
6507     //  kmp_int64 up; // upper
6508     //  kmp_int64 st; // stride
6509     // };
6510     RD = C.buildImplicitRecord("kmp_dim");
6511     RD->startDefinition();
6512     addFieldToRecordDecl(C, RD, Int64Ty);
6513     addFieldToRecordDecl(C, RD, Int64Ty);
6514     addFieldToRecordDecl(C, RD, Int64Ty);
6515     RD->completeDefinition();
6516     KmpDimTy = C.getRecordType(RD);
6517   } else
6518     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
6519 
6520   Address DimsAddr = CGF.CreateMemTemp(KmpDimTy, "dims");
6521   CGF.EmitNullInitialization(DimsAddr, KmpDimTy);
6522   enum { LowerFD = 0, UpperFD, StrideFD };
6523   // Fill dims with data.
6524   LValue DimsLVal = CGF.MakeAddrLValue(DimsAddr, KmpDimTy);
6525   // dims.upper = num_iterations;
6526   LValue UpperLVal =
6527       CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), UpperFD));
6528   llvm::Value *NumIterVal = CGF.EmitScalarConversion(
6529       CGF.EmitScalarExpr(D.getNumIterations()), D.getNumIterations()->getType(),
6530       Int64Ty, D.getNumIterations()->getExprLoc());
6531   CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
6532   // dims.stride = 1;
6533   LValue StrideLVal =
6534       CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), StrideFD));
6535   CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
6536                         StrideLVal);
6537 
6538   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
6539   // kmp_int32 num_dims, struct kmp_dim * dims);
6540   llvm::Value *Args[] = {emitUpdateLocation(CGF, D.getLocStart()),
6541                          getThreadID(CGF, D.getLocStart()),
6542                          llvm::ConstantInt::getSigned(CGM.Int32Ty, 1),
6543                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6544                              DimsAddr.getPointer(), CGM.VoidPtrTy)};
6545 
6546   llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_init);
6547   CGF.EmitRuntimeCall(RTLFn, Args);
6548   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
6549       emitUpdateLocation(CGF, D.getLocEnd()), getThreadID(CGF, D.getLocEnd())};
6550   llvm::Value *FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
6551   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
6552                                              llvm::makeArrayRef(FiniArgs));
6553 }
6554 
6555 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
6556                                           const OMPDependClause *C) {
6557   QualType Int64Ty =
6558       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
6559   const Expr *CounterVal = C->getCounterValue();
6560   assert(CounterVal);
6561   llvm::Value *CntVal = CGF.EmitScalarConversion(CGF.EmitScalarExpr(CounterVal),
6562                                                  CounterVal->getType(), Int64Ty,
6563                                                  CounterVal->getExprLoc());
6564   Address CntAddr = CGF.CreateMemTemp(Int64Ty, ".cnt.addr");
6565   CGF.EmitStoreOfScalar(CntVal, CntAddr, /*Volatile=*/false, Int64Ty);
6566   llvm::Value *Args[] = {emitUpdateLocation(CGF, C->getLocStart()),
6567                          getThreadID(CGF, C->getLocStart()),
6568                          CntAddr.getPointer()};
6569   llvm::Value *RTLFn;
6570   if (C->getDependencyKind() == OMPC_DEPEND_source)
6571     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
6572   else {
6573     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
6574     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
6575   }
6576   CGF.EmitRuntimeCall(RTLFn, Args);
6577 }
6578 
6579