1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGCXXABI.h"
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/CodeGen/ConstantInitBuilder.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "clang/Basic/BitmaskEnum.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/Bitcode/BitcodeReader.h"
24 #include "llvm/IR/DerivedTypes.h"
25 #include "llvm/IR/GlobalValue.h"
26 #include "llvm/IR/Value.h"
27 #include "llvm/Support/Format.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <cassert>
30 
31 using namespace clang;
32 using namespace CodeGen;
33 
34 namespace {
35 /// Base class for handling code generation inside OpenMP regions.
36 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
37 public:
38   /// Kinds of OpenMP regions used in codegen.
39   enum CGOpenMPRegionKind {
40     /// Region with outlined function for standalone 'parallel'
41     /// directive.
42     ParallelOutlinedRegion,
43     /// Region with outlined function for standalone 'task' directive.
44     TaskOutlinedRegion,
45     /// Region for constructs that do not require function outlining,
46     /// like 'for', 'sections', 'atomic' etc. directives.
47     InlinedRegion,
48     /// Region with outlined function for standalone 'target' directive.
49     TargetRegion,
50   };
51 
52   CGOpenMPRegionInfo(const CapturedStmt &CS,
53                      const CGOpenMPRegionKind RegionKind,
54                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
55                      bool HasCancel)
56       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
57         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
58 
59   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
60                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
61                      bool HasCancel)
62       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
63         Kind(Kind), HasCancel(HasCancel) {}
64 
65   /// Get a variable or parameter for storing global thread id
66   /// inside OpenMP construct.
67   virtual const VarDecl *getThreadIDVariable() const = 0;
68 
69   /// Emit the captured statement body.
70   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
71 
72   /// Get an LValue for the current ThreadID variable.
73   /// \return LValue for thread id variable. This LValue always has type int32*.
74   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
75 
76   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
77 
78   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
79 
80   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
81 
82   bool hasCancel() const { return HasCancel; }
83 
84   static bool classof(const CGCapturedStmtInfo *Info) {
85     return Info->getKind() == CR_OpenMP;
86   }
87 
88   ~CGOpenMPRegionInfo() override = default;
89 
90 protected:
91   CGOpenMPRegionKind RegionKind;
92   RegionCodeGenTy CodeGen;
93   OpenMPDirectiveKind Kind;
94   bool HasCancel;
95 };
96 
97 /// API for captured statement code generation in OpenMP constructs.
98 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
99 public:
100   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
101                              const RegionCodeGenTy &CodeGen,
102                              OpenMPDirectiveKind Kind, bool HasCancel,
103                              StringRef HelperName)
104       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
105                            HasCancel),
106         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
107     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
108   }
109 
110   /// Get a variable or parameter for storing global thread id
111   /// inside OpenMP construct.
112   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
113 
114   /// Get the name of the capture helper.
115   StringRef getHelperName() const override { return HelperName; }
116 
117   static bool classof(const CGCapturedStmtInfo *Info) {
118     return CGOpenMPRegionInfo::classof(Info) &&
119            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
120                ParallelOutlinedRegion;
121   }
122 
123 private:
124   /// A variable or parameter storing global thread id for OpenMP
125   /// constructs.
126   const VarDecl *ThreadIDVar;
127   StringRef HelperName;
128 };
129 
130 /// API for captured statement code generation in OpenMP constructs.
131 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
132 public:
133   class UntiedTaskActionTy final : public PrePostActionTy {
134     bool Untied;
135     const VarDecl *PartIDVar;
136     const RegionCodeGenTy UntiedCodeGen;
137     llvm::SwitchInst *UntiedSwitch = nullptr;
138 
139   public:
140     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
141                        const RegionCodeGenTy &UntiedCodeGen)
142         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
143     void Enter(CodeGenFunction &CGF) override {
144       if (Untied) {
145         // Emit task switching point.
146         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
147             CGF.GetAddrOfLocalVar(PartIDVar),
148             PartIDVar->getType()->castAs<PointerType>());
149         llvm::Value *Res =
150             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
151         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
152         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
153         CGF.EmitBlock(DoneBB);
154         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
155         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
156         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
157                               CGF.Builder.GetInsertBlock());
158         emitUntiedSwitch(CGF);
159       }
160     }
161     void emitUntiedSwitch(CodeGenFunction &CGF) const {
162       if (Untied) {
163         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
164             CGF.GetAddrOfLocalVar(PartIDVar),
165             PartIDVar->getType()->castAs<PointerType>());
166         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
167                               PartIdLVal);
168         UntiedCodeGen(CGF);
169         CodeGenFunction::JumpDest CurPoint =
170             CGF.getJumpDestInCurrentScope(".untied.next.");
171         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
172         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
173         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
174                               CGF.Builder.GetInsertBlock());
175         CGF.EmitBranchThroughCleanup(CurPoint);
176         CGF.EmitBlock(CurPoint.getBlock());
177       }
178     }
179     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
180   };
181   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
182                                  const VarDecl *ThreadIDVar,
183                                  const RegionCodeGenTy &CodeGen,
184                                  OpenMPDirectiveKind Kind, bool HasCancel,
185                                  const UntiedTaskActionTy &Action)
186       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
187         ThreadIDVar(ThreadIDVar), Action(Action) {
188     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
189   }
190 
191   /// Get a variable or parameter for storing global thread id
192   /// inside OpenMP construct.
193   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
194 
195   /// Get an LValue for the current ThreadID variable.
196   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
197 
198   /// Get the name of the capture helper.
199   StringRef getHelperName() const override { return ".omp_outlined."; }
200 
201   void emitUntiedSwitch(CodeGenFunction &CGF) override {
202     Action.emitUntiedSwitch(CGF);
203   }
204 
205   static bool classof(const CGCapturedStmtInfo *Info) {
206     return CGOpenMPRegionInfo::classof(Info) &&
207            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
208                TaskOutlinedRegion;
209   }
210 
211 private:
212   /// A variable or parameter storing global thread id for OpenMP
213   /// constructs.
214   const VarDecl *ThreadIDVar;
215   /// Action for emitting code for untied tasks.
216   const UntiedTaskActionTy &Action;
217 };
218 
219 /// API for inlined captured statement code generation in OpenMP
220 /// constructs.
221 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
222 public:
223   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
224                             const RegionCodeGenTy &CodeGen,
225                             OpenMPDirectiveKind Kind, bool HasCancel)
226       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
227         OldCSI(OldCSI),
228         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
229 
230   // Retrieve the value of the context parameter.
231   llvm::Value *getContextValue() const override {
232     if (OuterRegionInfo)
233       return OuterRegionInfo->getContextValue();
234     llvm_unreachable("No context value for inlined OpenMP region");
235   }
236 
237   void setContextValue(llvm::Value *V) override {
238     if (OuterRegionInfo) {
239       OuterRegionInfo->setContextValue(V);
240       return;
241     }
242     llvm_unreachable("No context value for inlined OpenMP region");
243   }
244 
245   /// Lookup the captured field decl for a variable.
246   const FieldDecl *lookup(const VarDecl *VD) const override {
247     if (OuterRegionInfo)
248       return OuterRegionInfo->lookup(VD);
249     // If there is no outer outlined region,no need to lookup in a list of
250     // captured variables, we can use the original one.
251     return nullptr;
252   }
253 
254   FieldDecl *getThisFieldDecl() const override {
255     if (OuterRegionInfo)
256       return OuterRegionInfo->getThisFieldDecl();
257     return nullptr;
258   }
259 
260   /// Get a variable or parameter for storing global thread id
261   /// inside OpenMP construct.
262   const VarDecl *getThreadIDVariable() const override {
263     if (OuterRegionInfo)
264       return OuterRegionInfo->getThreadIDVariable();
265     return nullptr;
266   }
267 
268   /// Get an LValue for the current ThreadID variable.
269   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
270     if (OuterRegionInfo)
271       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
272     llvm_unreachable("No LValue for inlined OpenMP construct");
273   }
274 
275   /// Get the name of the capture helper.
276   StringRef getHelperName() const override {
277     if (auto *OuterRegionInfo = getOldCSI())
278       return OuterRegionInfo->getHelperName();
279     llvm_unreachable("No helper name for inlined OpenMP construct");
280   }
281 
282   void emitUntiedSwitch(CodeGenFunction &CGF) override {
283     if (OuterRegionInfo)
284       OuterRegionInfo->emitUntiedSwitch(CGF);
285   }
286 
287   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
288 
289   static bool classof(const CGCapturedStmtInfo *Info) {
290     return CGOpenMPRegionInfo::classof(Info) &&
291            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
292   }
293 
294   ~CGOpenMPInlinedRegionInfo() override = default;
295 
296 private:
297   /// CodeGen info about outer OpenMP region.
298   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
299   CGOpenMPRegionInfo *OuterRegionInfo;
300 };
301 
302 /// API for captured statement code generation in OpenMP target
303 /// constructs. For this captures, implicit parameters are used instead of the
304 /// captured fields. The name of the target region has to be unique in a given
305 /// application so it is provided by the client, because only the client has
306 /// the information to generate that.
307 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
308 public:
309   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
310                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
311       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
312                            /*HasCancel=*/false),
313         HelperName(HelperName) {}
314 
315   /// This is unused for target regions because each starts executing
316   /// with a single thread.
317   const VarDecl *getThreadIDVariable() const override { return nullptr; }
318 
319   /// Get the name of the capture helper.
320   StringRef getHelperName() const override { return HelperName; }
321 
322   static bool classof(const CGCapturedStmtInfo *Info) {
323     return CGOpenMPRegionInfo::classof(Info) &&
324            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
325   }
326 
327 private:
328   StringRef HelperName;
329 };
330 
331 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
332   llvm_unreachable("No codegen for expressions");
333 }
334 /// API for generation of expressions captured in a innermost OpenMP
335 /// region.
336 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
337 public:
338   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
339       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
340                                   OMPD_unknown,
341                                   /*HasCancel=*/false),
342         PrivScope(CGF) {
343     // Make sure the globals captured in the provided statement are local by
344     // using the privatization logic. We assume the same variable is not
345     // captured more than once.
346     for (const auto &C : CS.captures()) {
347       if (!C.capturesVariable() && !C.capturesVariableByCopy())
348         continue;
349 
350       const VarDecl *VD = C.getCapturedVar();
351       if (VD->isLocalVarDeclOrParm())
352         continue;
353 
354       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
355                       /*RefersToEnclosingVariableOrCapture=*/false,
356                       VD->getType().getNonReferenceType(), VK_LValue,
357                       C.getLocation());
358       PrivScope.addPrivate(
359           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); });
360     }
361     (void)PrivScope.Privatize();
362   }
363 
364   /// Lookup the captured field decl for a variable.
365   const FieldDecl *lookup(const VarDecl *VD) const override {
366     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
367       return FD;
368     return nullptr;
369   }
370 
371   /// Emit the captured statement body.
372   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
373     llvm_unreachable("No body for expressions");
374   }
375 
376   /// Get a variable or parameter for storing global thread id
377   /// inside OpenMP construct.
378   const VarDecl *getThreadIDVariable() const override {
379     llvm_unreachable("No thread id for expressions");
380   }
381 
382   /// Get the name of the capture helper.
383   StringRef getHelperName() const override {
384     llvm_unreachable("No helper name for expressions");
385   }
386 
387   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
388 
389 private:
390   /// Private scope to capture global variables.
391   CodeGenFunction::OMPPrivateScope PrivScope;
392 };
393 
394 /// RAII for emitting code of OpenMP constructs.
395 class InlinedOpenMPRegionRAII {
396   CodeGenFunction &CGF;
397   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
398   FieldDecl *LambdaThisCaptureField = nullptr;
399   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
400 
401 public:
402   /// Constructs region for combined constructs.
403   /// \param CodeGen Code generation sequence for combined directives. Includes
404   /// a list of functions used for code generation of implicitly inlined
405   /// regions.
406   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
407                           OpenMPDirectiveKind Kind, bool HasCancel)
408       : CGF(CGF) {
409     // Start emission for the construct.
410     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
411         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
412     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
413     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
414     CGF.LambdaThisCaptureField = nullptr;
415     BlockInfo = CGF.BlockInfo;
416     CGF.BlockInfo = nullptr;
417   }
418 
419   ~InlinedOpenMPRegionRAII() {
420     // Restore original CapturedStmtInfo only if we're done with code emission.
421     auto *OldCSI =
422         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
423     delete CGF.CapturedStmtInfo;
424     CGF.CapturedStmtInfo = OldCSI;
425     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
426     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
427     CGF.BlockInfo = BlockInfo;
428   }
429 };
430 
431 /// Values for bit flags used in the ident_t to describe the fields.
432 /// All enumeric elements are named and described in accordance with the code
433 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
434 enum OpenMPLocationFlags : unsigned {
435   /// Use trampoline for internal microtask.
436   OMP_IDENT_IMD = 0x01,
437   /// Use c-style ident structure.
438   OMP_IDENT_KMPC = 0x02,
439   /// Atomic reduction option for kmpc_reduce.
440   OMP_ATOMIC_REDUCE = 0x10,
441   /// Explicit 'barrier' directive.
442   OMP_IDENT_BARRIER_EXPL = 0x20,
443   /// Implicit barrier in code.
444   OMP_IDENT_BARRIER_IMPL = 0x40,
445   /// Implicit barrier in 'for' directive.
446   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
447   /// Implicit barrier in 'sections' directive.
448   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
449   /// Implicit barrier in 'single' directive.
450   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
451   /// Call of __kmp_for_static_init for static loop.
452   OMP_IDENT_WORK_LOOP = 0x200,
453   /// Call of __kmp_for_static_init for sections.
454   OMP_IDENT_WORK_SECTIONS = 0x400,
455   /// Call of __kmp_for_static_init for distribute.
456   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
457   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
458 };
459 
460 /// Describes ident structure that describes a source location.
461 /// All descriptions are taken from
462 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
463 /// Original structure:
464 /// typedef struct ident {
465 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
466 ///                                  see above  */
467 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
468 ///                                  KMP_IDENT_KMPC identifies this union
469 ///                                  member  */
470 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
471 ///                                  see above */
472 ///#if USE_ITT_BUILD
473 ///                            /*  but currently used for storing
474 ///                                region-specific ITT */
475 ///                            /*  contextual information. */
476 ///#endif /* USE_ITT_BUILD */
477 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
478 ///                                 C++  */
479 ///    char const *psource;    /**< String describing the source location.
480 ///                            The string is composed of semi-colon separated
481 //                             fields which describe the source file,
482 ///                            the function and a pair of line numbers that
483 ///                            delimit the construct.
484 ///                             */
485 /// } ident_t;
486 enum IdentFieldIndex {
487   /// might be used in Fortran
488   IdentField_Reserved_1,
489   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
490   IdentField_Flags,
491   /// Not really used in Fortran any more
492   IdentField_Reserved_2,
493   /// Source[4] in Fortran, do not use for C++
494   IdentField_Reserved_3,
495   /// String describing the source location. The string is composed of
496   /// semi-colon separated fields which describe the source file, the function
497   /// and a pair of line numbers that delimit the construct.
498   IdentField_PSource
499 };
500 
501 /// Schedule types for 'omp for' loops (these enumerators are taken from
502 /// the enum sched_type in kmp.h).
503 enum OpenMPSchedType {
504   /// Lower bound for default (unordered) versions.
505   OMP_sch_lower = 32,
506   OMP_sch_static_chunked = 33,
507   OMP_sch_static = 34,
508   OMP_sch_dynamic_chunked = 35,
509   OMP_sch_guided_chunked = 36,
510   OMP_sch_runtime = 37,
511   OMP_sch_auto = 38,
512   /// static with chunk adjustment (e.g., simd)
513   OMP_sch_static_balanced_chunked = 45,
514   /// Lower bound for 'ordered' versions.
515   OMP_ord_lower = 64,
516   OMP_ord_static_chunked = 65,
517   OMP_ord_static = 66,
518   OMP_ord_dynamic_chunked = 67,
519   OMP_ord_guided_chunked = 68,
520   OMP_ord_runtime = 69,
521   OMP_ord_auto = 70,
522   OMP_sch_default = OMP_sch_static,
523   /// dist_schedule types
524   OMP_dist_sch_static_chunked = 91,
525   OMP_dist_sch_static = 92,
526   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
527   /// Set if the monotonic schedule modifier was present.
528   OMP_sch_modifier_monotonic = (1 << 29),
529   /// Set if the nonmonotonic schedule modifier was present.
530   OMP_sch_modifier_nonmonotonic = (1 << 30),
531 };
532 
533 enum OpenMPRTLFunction {
534   /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
535   /// kmpc_micro microtask, ...);
536   OMPRTL__kmpc_fork_call,
537   /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
538   /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
539   OMPRTL__kmpc_threadprivate_cached,
540   /// Call to void __kmpc_threadprivate_register( ident_t *,
541   /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
542   OMPRTL__kmpc_threadprivate_register,
543   // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
544   OMPRTL__kmpc_global_thread_num,
545   // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
546   // kmp_critical_name *crit);
547   OMPRTL__kmpc_critical,
548   // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
549   // global_tid, kmp_critical_name *crit, uintptr_t hint);
550   OMPRTL__kmpc_critical_with_hint,
551   // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
552   // kmp_critical_name *crit);
553   OMPRTL__kmpc_end_critical,
554   // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
555   // global_tid);
556   OMPRTL__kmpc_cancel_barrier,
557   // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
558   OMPRTL__kmpc_barrier,
559   // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
560   OMPRTL__kmpc_for_static_fini,
561   // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
562   // global_tid);
563   OMPRTL__kmpc_serialized_parallel,
564   // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
565   // global_tid);
566   OMPRTL__kmpc_end_serialized_parallel,
567   // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
568   // kmp_int32 num_threads);
569   OMPRTL__kmpc_push_num_threads,
570   // Call to void __kmpc_flush(ident_t *loc);
571   OMPRTL__kmpc_flush,
572   // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
573   OMPRTL__kmpc_master,
574   // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
575   OMPRTL__kmpc_end_master,
576   // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
577   // int end_part);
578   OMPRTL__kmpc_omp_taskyield,
579   // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
580   OMPRTL__kmpc_single,
581   // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
582   OMPRTL__kmpc_end_single,
583   // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
584   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
585   // kmp_routine_entry_t *task_entry);
586   OMPRTL__kmpc_omp_task_alloc,
587   // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
588   // new_task);
589   OMPRTL__kmpc_omp_task,
590   // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
591   // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
592   // kmp_int32 didit);
593   OMPRTL__kmpc_copyprivate,
594   // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
595   // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
596   // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
597   OMPRTL__kmpc_reduce,
598   // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
599   // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
600   // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
601   // *lck);
602   OMPRTL__kmpc_reduce_nowait,
603   // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
604   // kmp_critical_name *lck);
605   OMPRTL__kmpc_end_reduce,
606   // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
607   // kmp_critical_name *lck);
608   OMPRTL__kmpc_end_reduce_nowait,
609   // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
610   // kmp_task_t * new_task);
611   OMPRTL__kmpc_omp_task_begin_if0,
612   // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
613   // kmp_task_t * new_task);
614   OMPRTL__kmpc_omp_task_complete_if0,
615   // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
616   OMPRTL__kmpc_ordered,
617   // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
618   OMPRTL__kmpc_end_ordered,
619   // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
620   // global_tid);
621   OMPRTL__kmpc_omp_taskwait,
622   // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
623   OMPRTL__kmpc_taskgroup,
624   // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
625   OMPRTL__kmpc_end_taskgroup,
626   // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
627   // int proc_bind);
628   OMPRTL__kmpc_push_proc_bind,
629   // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
630   // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
631   // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
632   OMPRTL__kmpc_omp_task_with_deps,
633   // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
634   // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
635   // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
636   OMPRTL__kmpc_omp_wait_deps,
637   // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
638   // global_tid, kmp_int32 cncl_kind);
639   OMPRTL__kmpc_cancellationpoint,
640   // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
641   // kmp_int32 cncl_kind);
642   OMPRTL__kmpc_cancel,
643   // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
644   // kmp_int32 num_teams, kmp_int32 thread_limit);
645   OMPRTL__kmpc_push_num_teams,
646   // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
647   // microtask, ...);
648   OMPRTL__kmpc_fork_teams,
649   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
650   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
651   // sched, kmp_uint64 grainsize, void *task_dup);
652   OMPRTL__kmpc_taskloop,
653   // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
654   // num_dims, struct kmp_dim *dims);
655   OMPRTL__kmpc_doacross_init,
656   // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
657   OMPRTL__kmpc_doacross_fini,
658   // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
659   // *vec);
660   OMPRTL__kmpc_doacross_post,
661   // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
662   // *vec);
663   OMPRTL__kmpc_doacross_wait,
664   // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
665   // *data);
666   OMPRTL__kmpc_task_reduction_init,
667   // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
668   // *d);
669   OMPRTL__kmpc_task_reduction_get_th_data,
670   // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
671   OMPRTL__kmpc_alloc,
672   // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
673   OMPRTL__kmpc_free,
674 
675   //
676   // Offloading related calls
677   //
678   // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
679   // size);
680   OMPRTL__kmpc_push_target_tripcount,
681   // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
682   // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
683   // *arg_types);
684   OMPRTL__tgt_target,
685   // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
686   // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
687   // *arg_types);
688   OMPRTL__tgt_target_nowait,
689   // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
690   // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
691   // *arg_types, int32_t num_teams, int32_t thread_limit);
692   OMPRTL__tgt_target_teams,
693   // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
694   // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t
695   // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
696   OMPRTL__tgt_target_teams_nowait,
697   // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
698   OMPRTL__tgt_register_lib,
699   // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
700   OMPRTL__tgt_unregister_lib,
701   // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
702   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
703   OMPRTL__tgt_target_data_begin,
704   // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
705   // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
706   // *arg_types);
707   OMPRTL__tgt_target_data_begin_nowait,
708   // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
709   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
710   OMPRTL__tgt_target_data_end,
711   // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
712   // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
713   // *arg_types);
714   OMPRTL__tgt_target_data_end_nowait,
715   // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
716   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
717   OMPRTL__tgt_target_data_update,
718   // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
719   // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
720   // *arg_types);
721   OMPRTL__tgt_target_data_update_nowait,
722 };
723 
724 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
725 /// region.
726 class CleanupTy final : public EHScopeStack::Cleanup {
727   PrePostActionTy *Action;
728 
729 public:
730   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
731   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
732     if (!CGF.HaveInsertPoint())
733       return;
734     Action->Exit(CGF);
735   }
736 };
737 
738 } // anonymous namespace
739 
740 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
741   CodeGenFunction::RunCleanupsScope Scope(CGF);
742   if (PrePostAction) {
743     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
744     Callback(CodeGen, CGF, *PrePostAction);
745   } else {
746     PrePostActionTy Action;
747     Callback(CodeGen, CGF, Action);
748   }
749 }
750 
751 /// Check if the combiner is a call to UDR combiner and if it is so return the
752 /// UDR decl used for reduction.
753 static const OMPDeclareReductionDecl *
754 getReductionInit(const Expr *ReductionOp) {
755   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
756     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
757       if (const auto *DRE =
758               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
759         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
760           return DRD;
761   return nullptr;
762 }
763 
764 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
765                                              const OMPDeclareReductionDecl *DRD,
766                                              const Expr *InitOp,
767                                              Address Private, Address Original,
768                                              QualType Ty) {
769   if (DRD->getInitializer()) {
770     std::pair<llvm::Function *, llvm::Function *> Reduction =
771         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
772     const auto *CE = cast<CallExpr>(InitOp);
773     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
774     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
775     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
776     const auto *LHSDRE =
777         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
778     const auto *RHSDRE =
779         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
780     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
781     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
782                             [=]() { return Private; });
783     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
784                             [=]() { return Original; });
785     (void)PrivateScope.Privatize();
786     RValue Func = RValue::get(Reduction.second);
787     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
788     CGF.EmitIgnoredExpr(InitOp);
789   } else {
790     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
791     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
792     auto *GV = new llvm::GlobalVariable(
793         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
794         llvm::GlobalValue::PrivateLinkage, Init, Name);
795     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
796     RValue InitRVal;
797     switch (CGF.getEvaluationKind(Ty)) {
798     case TEK_Scalar:
799       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
800       break;
801     case TEK_Complex:
802       InitRVal =
803           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
804       break;
805     case TEK_Aggregate:
806       InitRVal = RValue::getAggregate(LV.getAddress());
807       break;
808     }
809     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
810     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
811     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
812                          /*IsInitializer=*/false);
813   }
814 }
815 
816 /// Emit initialization of arrays of complex types.
817 /// \param DestAddr Address of the array.
818 /// \param Type Type of array.
819 /// \param Init Initial expression of array.
820 /// \param SrcAddr Address of the original array.
821 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
822                                  QualType Type, bool EmitDeclareReductionInit,
823                                  const Expr *Init,
824                                  const OMPDeclareReductionDecl *DRD,
825                                  Address SrcAddr = Address::invalid()) {
826   // Perform element-by-element initialization.
827   QualType ElementTy;
828 
829   // Drill down to the base element type on both arrays.
830   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
831   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
832   DestAddr =
833       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
834   if (DRD)
835     SrcAddr =
836         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
837 
838   llvm::Value *SrcBegin = nullptr;
839   if (DRD)
840     SrcBegin = SrcAddr.getPointer();
841   llvm::Value *DestBegin = DestAddr.getPointer();
842   // Cast from pointer to array type to pointer to single element.
843   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
844   // The basic structure here is a while-do loop.
845   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
846   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
847   llvm::Value *IsEmpty =
848       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
849   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
850 
851   // Enter the loop body, making that address the current address.
852   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
853   CGF.EmitBlock(BodyBB);
854 
855   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
856 
857   llvm::PHINode *SrcElementPHI = nullptr;
858   Address SrcElementCurrent = Address::invalid();
859   if (DRD) {
860     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
861                                           "omp.arraycpy.srcElementPast");
862     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
863     SrcElementCurrent =
864         Address(SrcElementPHI,
865                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
866   }
867   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
868       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
869   DestElementPHI->addIncoming(DestBegin, EntryBB);
870   Address DestElementCurrent =
871       Address(DestElementPHI,
872               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
873 
874   // Emit copy.
875   {
876     CodeGenFunction::RunCleanupsScope InitScope(CGF);
877     if (EmitDeclareReductionInit) {
878       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
879                                        SrcElementCurrent, ElementTy);
880     } else
881       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
882                            /*IsInitializer=*/false);
883   }
884 
885   if (DRD) {
886     // Shift the address forward by one element.
887     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
888         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
889     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
890   }
891 
892   // Shift the address forward by one element.
893   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
894       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
895   // Check whether we've reached the end.
896   llvm::Value *Done =
897       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
898   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
899   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
900 
901   // Done.
902   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
903 }
904 
905 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
906   return CGF.EmitOMPSharedLValue(E);
907 }
908 
909 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
910                                             const Expr *E) {
911   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
912     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
913   return LValue();
914 }
915 
916 void ReductionCodeGen::emitAggregateInitialization(
917     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
918     const OMPDeclareReductionDecl *DRD) {
919   // Emit VarDecl with copy init for arrays.
920   // Get the address of the original variable captured in current
921   // captured region.
922   const auto *PrivateVD =
923       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
924   bool EmitDeclareReductionInit =
925       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
926   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
927                        EmitDeclareReductionInit,
928                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
929                                                 : PrivateVD->getInit(),
930                        DRD, SharedLVal.getAddress());
931 }
932 
933 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
934                                    ArrayRef<const Expr *> Privates,
935                                    ArrayRef<const Expr *> ReductionOps) {
936   ClausesData.reserve(Shareds.size());
937   SharedAddresses.reserve(Shareds.size());
938   Sizes.reserve(Shareds.size());
939   BaseDecls.reserve(Shareds.size());
940   auto IPriv = Privates.begin();
941   auto IRed = ReductionOps.begin();
942   for (const Expr *Ref : Shareds) {
943     ClausesData.emplace_back(Ref, *IPriv, *IRed);
944     std::advance(IPriv, 1);
945     std::advance(IRed, 1);
946   }
947 }
948 
949 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
950   assert(SharedAddresses.size() == N &&
951          "Number of generated lvalues must be exactly N.");
952   LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
953   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
954   SharedAddresses.emplace_back(First, Second);
955 }
956 
957 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
958   const auto *PrivateVD =
959       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
960   QualType PrivateType = PrivateVD->getType();
961   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
962   if (!PrivateType->isVariablyModifiedType()) {
963     Sizes.emplace_back(
964         CGF.getTypeSize(
965             SharedAddresses[N].first.getType().getNonReferenceType()),
966         nullptr);
967     return;
968   }
969   llvm::Value *Size;
970   llvm::Value *SizeInChars;
971   auto *ElemType =
972       cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType())
973           ->getElementType();
974   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
975   if (AsArraySection) {
976     Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
977                                      SharedAddresses[N].first.getPointer());
978     Size = CGF.Builder.CreateNUWAdd(
979         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
980     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
981   } else {
982     SizeInChars = CGF.getTypeSize(
983         SharedAddresses[N].first.getType().getNonReferenceType());
984     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
985   }
986   Sizes.emplace_back(SizeInChars, Size);
987   CodeGenFunction::OpaqueValueMapping OpaqueMap(
988       CGF,
989       cast<OpaqueValueExpr>(
990           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
991       RValue::get(Size));
992   CGF.EmitVariablyModifiedType(PrivateType);
993 }
994 
995 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
996                                          llvm::Value *Size) {
997   const auto *PrivateVD =
998       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
999   QualType PrivateType = PrivateVD->getType();
1000   if (!PrivateType->isVariablyModifiedType()) {
1001     assert(!Size && !Sizes[N].second &&
1002            "Size should be nullptr for non-variably modified reduction "
1003            "items.");
1004     return;
1005   }
1006   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1007       CGF,
1008       cast<OpaqueValueExpr>(
1009           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1010       RValue::get(Size));
1011   CGF.EmitVariablyModifiedType(PrivateType);
1012 }
1013 
1014 void ReductionCodeGen::emitInitialization(
1015     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1016     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1017   assert(SharedAddresses.size() > N && "No variable was generated");
1018   const auto *PrivateVD =
1019       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1020   const OMPDeclareReductionDecl *DRD =
1021       getReductionInit(ClausesData[N].ReductionOp);
1022   QualType PrivateType = PrivateVD->getType();
1023   PrivateAddr = CGF.Builder.CreateElementBitCast(
1024       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1025   QualType SharedType = SharedAddresses[N].first.getType();
1026   SharedLVal = CGF.MakeAddrLValue(
1027       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
1028                                        CGF.ConvertTypeForMem(SharedType)),
1029       SharedType, SharedAddresses[N].first.getBaseInfo(),
1030       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1031   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1032     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1033   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1034     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1035                                      PrivateAddr, SharedLVal.getAddress(),
1036                                      SharedLVal.getType());
1037   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1038              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1039     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1040                          PrivateVD->getType().getQualifiers(),
1041                          /*IsInitializer=*/false);
1042   }
1043 }
1044 
1045 bool ReductionCodeGen::needCleanups(unsigned N) {
1046   const auto *PrivateVD =
1047       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1048   QualType PrivateType = PrivateVD->getType();
1049   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1050   return DTorKind != QualType::DK_none;
1051 }
1052 
1053 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
1054                                     Address PrivateAddr) {
1055   const auto *PrivateVD =
1056       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1057   QualType PrivateType = PrivateVD->getType();
1058   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1059   if (needCleanups(N)) {
1060     PrivateAddr = CGF.Builder.CreateElementBitCast(
1061         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1062     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1063   }
1064 }
1065 
1066 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1067                           LValue BaseLV) {
1068   BaseTy = BaseTy.getNonReferenceType();
1069   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1070          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1071     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
1072       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
1073     } else {
1074       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
1075       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1076     }
1077     BaseTy = BaseTy->getPointeeType();
1078   }
1079   return CGF.MakeAddrLValue(
1080       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(),
1081                                        CGF.ConvertTypeForMem(ElTy)),
1082       BaseLV.getType(), BaseLV.getBaseInfo(),
1083       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1084 }
1085 
1086 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1087                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1088                           llvm::Value *Addr) {
1089   Address Tmp = Address::invalid();
1090   Address TopTmp = Address::invalid();
1091   Address MostTopTmp = Address::invalid();
1092   BaseTy = BaseTy.getNonReferenceType();
1093   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1094          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1095     Tmp = CGF.CreateMemTemp(BaseTy);
1096     if (TopTmp.isValid())
1097       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1098     else
1099       MostTopTmp = Tmp;
1100     TopTmp = Tmp;
1101     BaseTy = BaseTy->getPointeeType();
1102   }
1103   llvm::Type *Ty = BaseLVType;
1104   if (Tmp.isValid())
1105     Ty = Tmp.getElementType();
1106   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1107   if (Tmp.isValid()) {
1108     CGF.Builder.CreateStore(Addr, Tmp);
1109     return MostTopTmp;
1110   }
1111   return Address(Addr, BaseLVAlignment);
1112 }
1113 
1114 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1115   const VarDecl *OrigVD = nullptr;
1116   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1117     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
1118     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1119       Base = TempOASE->getBase()->IgnoreParenImpCasts();
1120     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1121       Base = TempASE->getBase()->IgnoreParenImpCasts();
1122     DE = cast<DeclRefExpr>(Base);
1123     OrigVD = cast<VarDecl>(DE->getDecl());
1124   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1125     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1126     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1127       Base = TempASE->getBase()->IgnoreParenImpCasts();
1128     DE = cast<DeclRefExpr>(Base);
1129     OrigVD = cast<VarDecl>(DE->getDecl());
1130   }
1131   return OrigVD;
1132 }
1133 
1134 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1135                                                Address PrivateAddr) {
1136   const DeclRefExpr *DE;
1137   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1138     BaseDecls.emplace_back(OrigVD);
1139     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1140     LValue BaseLValue =
1141         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1142                     OriginalBaseLValue);
1143     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1144         BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
1145     llvm::Value *PrivatePointer =
1146         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1147             PrivateAddr.getPointer(),
1148             SharedAddresses[N].first.getAddress().getType());
1149     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1150     return castToBase(CGF, OrigVD->getType(),
1151                       SharedAddresses[N].first.getType(),
1152                       OriginalBaseLValue.getAddress().getType(),
1153                       OriginalBaseLValue.getAlignment(), Ptr);
1154   }
1155   BaseDecls.emplace_back(
1156       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1157   return PrivateAddr;
1158 }
1159 
1160 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1161   const OMPDeclareReductionDecl *DRD =
1162       getReductionInit(ClausesData[N].ReductionOp);
1163   return DRD && DRD->getInitializer();
1164 }
1165 
1166 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1167   return CGF.EmitLoadOfPointerLValue(
1168       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1169       getThreadIDVariable()->getType()->castAs<PointerType>());
1170 }
1171 
1172 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1173   if (!CGF.HaveInsertPoint())
1174     return;
1175   // 1.2.2 OpenMP Language Terminology
1176   // Structured block - An executable statement with a single entry at the
1177   // top and a single exit at the bottom.
1178   // The point of exit cannot be a branch out of the structured block.
1179   // longjmp() and throw() must not violate the entry/exit criteria.
1180   CGF.EHStack.pushTerminate();
1181   CodeGen(CGF);
1182   CGF.EHStack.popTerminate();
1183 }
1184 
1185 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1186     CodeGenFunction &CGF) {
1187   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1188                             getThreadIDVariable()->getType(),
1189                             AlignmentSource::Decl);
1190 }
1191 
1192 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1193                                        QualType FieldTy) {
1194   auto *Field = FieldDecl::Create(
1195       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1196       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1197       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1198   Field->setAccess(AS_public);
1199   DC->addDecl(Field);
1200   return Field;
1201 }
1202 
1203 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1204                                  StringRef Separator)
1205     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1206       OffloadEntriesInfoManager(CGM) {
1207   ASTContext &C = CGM.getContext();
1208   RecordDecl *RD = C.buildImplicitRecord("ident_t");
1209   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1210   RD->startDefinition();
1211   // reserved_1
1212   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1213   // flags
1214   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1215   // reserved_2
1216   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1217   // reserved_3
1218   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1219   // psource
1220   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1221   RD->completeDefinition();
1222   IdentQTy = C.getRecordType(RD);
1223   IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1224   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1225 
1226   loadOffloadInfoMetadata();
1227 }
1228 
1229 void CGOpenMPRuntime::clear() {
1230   InternalVars.clear();
1231   // Clean non-target variable declarations possibly used only in debug info.
1232   for (const auto &Data : EmittedNonTargetVariables) {
1233     if (!Data.getValue().pointsToAliveValue())
1234       continue;
1235     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1236     if (!GV)
1237       continue;
1238     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1239       continue;
1240     GV->eraseFromParent();
1241   }
1242 }
1243 
1244 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1245   SmallString<128> Buffer;
1246   llvm::raw_svector_ostream OS(Buffer);
1247   StringRef Sep = FirstSeparator;
1248   for (StringRef Part : Parts) {
1249     OS << Sep << Part;
1250     Sep = Separator;
1251   }
1252   return OS.str();
1253 }
1254 
1255 static llvm::Function *
1256 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1257                           const Expr *CombinerInitializer, const VarDecl *In,
1258                           const VarDecl *Out, bool IsCombiner) {
1259   // void .omp_combiner.(Ty *in, Ty *out);
1260   ASTContext &C = CGM.getContext();
1261   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1262   FunctionArgList Args;
1263   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1264                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1265   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1266                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1267   Args.push_back(&OmpOutParm);
1268   Args.push_back(&OmpInParm);
1269   const CGFunctionInfo &FnInfo =
1270       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1271   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1272   std::string Name = CGM.getOpenMPRuntime().getName(
1273       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1274   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1275                                     Name, &CGM.getModule());
1276   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1277   Fn->removeFnAttr(llvm::Attribute::NoInline);
1278   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1279   Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1280   CodeGenFunction CGF(CGM);
1281   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1282   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1283   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1284                     Out->getLocation());
1285   CodeGenFunction::OMPPrivateScope Scope(CGF);
1286   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1287   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1288     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1289         .getAddress();
1290   });
1291   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1292   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1293     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1294         .getAddress();
1295   });
1296   (void)Scope.Privatize();
1297   if (!IsCombiner && Out->hasInit() &&
1298       !CGF.isTrivialInitializer(Out->getInit())) {
1299     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1300                          Out->getType().getQualifiers(),
1301                          /*IsInitializer=*/true);
1302   }
1303   if (CombinerInitializer)
1304     CGF.EmitIgnoredExpr(CombinerInitializer);
1305   Scope.ForceCleanup();
1306   CGF.FinishFunction();
1307   return Fn;
1308 }
1309 
1310 void CGOpenMPRuntime::emitUserDefinedReduction(
1311     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1312   if (UDRMap.count(D) > 0)
1313     return;
1314   llvm::Function *Combiner = emitCombinerOrInitializer(
1315       CGM, D->getType(), D->getCombiner(),
1316       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1317       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1318       /*IsCombiner=*/true);
1319   llvm::Function *Initializer = nullptr;
1320   if (const Expr *Init = D->getInitializer()) {
1321     Initializer = emitCombinerOrInitializer(
1322         CGM, D->getType(),
1323         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1324                                                                      : nullptr,
1325         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1326         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1327         /*IsCombiner=*/false);
1328   }
1329   UDRMap.try_emplace(D, Combiner, Initializer);
1330   if (CGF) {
1331     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1332     Decls.second.push_back(D);
1333   }
1334 }
1335 
1336 std::pair<llvm::Function *, llvm::Function *>
1337 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1338   auto I = UDRMap.find(D);
1339   if (I != UDRMap.end())
1340     return I->second;
1341   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1342   return UDRMap.lookup(D);
1343 }
1344 
1345 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1346     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1347     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1348     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1349   assert(ThreadIDVar->getType()->isPointerType() &&
1350          "thread id variable must be of type kmp_int32 *");
1351   CodeGenFunction CGF(CGM, true);
1352   bool HasCancel = false;
1353   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1354     HasCancel = OPD->hasCancel();
1355   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1356     HasCancel = OPSD->hasCancel();
1357   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1358     HasCancel = OPFD->hasCancel();
1359   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1360     HasCancel = OPFD->hasCancel();
1361   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1362     HasCancel = OPFD->hasCancel();
1363   else if (const auto *OPFD =
1364                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1365     HasCancel = OPFD->hasCancel();
1366   else if (const auto *OPFD =
1367                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1368     HasCancel = OPFD->hasCancel();
1369   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1370                                     HasCancel, OutlinedHelperName);
1371   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1372   return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
1373 }
1374 
1375 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1376     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1377     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1378   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1379   return emitParallelOrTeamsOutlinedFunction(
1380       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1381 }
1382 
1383 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1384     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1385     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1386   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1387   return emitParallelOrTeamsOutlinedFunction(
1388       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1389 }
1390 
1391 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1392     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1393     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1394     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1395     bool Tied, unsigned &NumberOfParts) {
1396   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1397                                               PrePostActionTy &) {
1398     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1399     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1400     llvm::Value *TaskArgs[] = {
1401         UpLoc, ThreadID,
1402         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1403                                     TaskTVar->getType()->castAs<PointerType>())
1404             .getPointer()};
1405     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1406   };
1407   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1408                                                             UntiedCodeGen);
1409   CodeGen.setAction(Action);
1410   assert(!ThreadIDVar->getType()->isPointerType() &&
1411          "thread id variable must be of type kmp_int32 for tasks");
1412   const OpenMPDirectiveKind Region =
1413       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1414                                                       : OMPD_task;
1415   const CapturedStmt *CS = D.getCapturedStmt(Region);
1416   const auto *TD = dyn_cast<OMPTaskDirective>(&D);
1417   CodeGenFunction CGF(CGM, true);
1418   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1419                                         InnermostKind,
1420                                         TD ? TD->hasCancel() : false, Action);
1421   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1422   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1423   if (!Tied)
1424     NumberOfParts = Action.getNumberOfParts();
1425   return Res;
1426 }
1427 
1428 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1429                              const RecordDecl *RD, const CGRecordLayout &RL,
1430                              ArrayRef<llvm::Constant *> Data) {
1431   llvm::StructType *StructTy = RL.getLLVMType();
1432   unsigned PrevIdx = 0;
1433   ConstantInitBuilder CIBuilder(CGM);
1434   auto DI = Data.begin();
1435   for (const FieldDecl *FD : RD->fields()) {
1436     unsigned Idx = RL.getLLVMFieldNo(FD);
1437     // Fill the alignment.
1438     for (unsigned I = PrevIdx; I < Idx; ++I)
1439       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1440     PrevIdx = Idx + 1;
1441     Fields.add(*DI);
1442     ++DI;
1443   }
1444 }
1445 
1446 template <class... As>
1447 static llvm::GlobalVariable *
1448 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1449                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1450                    As &&... Args) {
1451   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1452   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1453   ConstantInitBuilder CIBuilder(CGM);
1454   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1455   buildStructValue(Fields, CGM, RD, RL, Data);
1456   return Fields.finishAndCreateGlobal(
1457       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1458       std::forward<As>(Args)...);
1459 }
1460 
1461 template <typename T>
1462 static void
1463 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1464                                          ArrayRef<llvm::Constant *> Data,
1465                                          T &Parent) {
1466   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1467   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1468   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1469   buildStructValue(Fields, CGM, RD, RL, Data);
1470   Fields.finishAndAddTo(Parent);
1471 }
1472 
1473 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1474   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1475   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1476   FlagsTy FlagsKey(Flags, Reserved2Flags);
1477   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1478   if (!Entry) {
1479     if (!DefaultOpenMPPSource) {
1480       // Initialize default location for psource field of ident_t structure of
1481       // all ident_t objects. Format is ";file;function;line;column;;".
1482       // Taken from
1483       // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1484       DefaultOpenMPPSource =
1485           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1486       DefaultOpenMPPSource =
1487           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1488     }
1489 
1490     llvm::Constant *Data[] = {
1491         llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1492         llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1493         llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1494         llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1495     llvm::GlobalValue *DefaultOpenMPLocation =
1496         createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1497                            llvm::GlobalValue::PrivateLinkage);
1498     DefaultOpenMPLocation->setUnnamedAddr(
1499         llvm::GlobalValue::UnnamedAddr::Global);
1500 
1501     OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1502   }
1503   return Address(Entry, Align);
1504 }
1505 
1506 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1507                                              bool AtCurrentPoint) {
1508   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1509   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1510 
1511   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1512   if (AtCurrentPoint) {
1513     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1514         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1515   } else {
1516     Elem.second.ServiceInsertPt =
1517         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1518     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1519   }
1520 }
1521 
1522 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1523   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1524   if (Elem.second.ServiceInsertPt) {
1525     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1526     Elem.second.ServiceInsertPt = nullptr;
1527     Ptr->eraseFromParent();
1528   }
1529 }
1530 
1531 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1532                                                  SourceLocation Loc,
1533                                                  unsigned Flags) {
1534   Flags |= OMP_IDENT_KMPC;
1535   // If no debug info is generated - return global default location.
1536   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1537       Loc.isInvalid())
1538     return getOrCreateDefaultLocation(Flags).getPointer();
1539 
1540   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1541 
1542   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1543   Address LocValue = Address::invalid();
1544   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1545   if (I != OpenMPLocThreadIDMap.end())
1546     LocValue = Address(I->second.DebugLoc, Align);
1547 
1548   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1549   // GetOpenMPThreadID was called before this routine.
1550   if (!LocValue.isValid()) {
1551     // Generate "ident_t .kmpc_loc.addr;"
1552     Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1553     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1554     Elem.second.DebugLoc = AI.getPointer();
1555     LocValue = AI;
1556 
1557     if (!Elem.second.ServiceInsertPt)
1558       setLocThreadIdInsertPt(CGF);
1559     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1560     CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1561     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1562                              CGF.getTypeSize(IdentQTy));
1563   }
1564 
1565   // char **psource = &.kmpc_loc_<flags>.addr.psource;
1566   LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1567   auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1568   LValue PSource =
1569       CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1570 
1571   llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1572   if (OMPDebugLoc == nullptr) {
1573     SmallString<128> Buffer2;
1574     llvm::raw_svector_ostream OS2(Buffer2);
1575     // Build debug location
1576     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1577     OS2 << ";" << PLoc.getFilename() << ";";
1578     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1579       OS2 << FD->getQualifiedNameAsString();
1580     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1581     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1582     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1583   }
1584   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1585   CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1586 
1587   // Our callers always pass this to a runtime function, so for
1588   // convenience, go ahead and return a naked pointer.
1589   return LocValue.getPointer();
1590 }
1591 
1592 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1593                                           SourceLocation Loc) {
1594   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1595 
1596   llvm::Value *ThreadID = nullptr;
1597   // Check whether we've already cached a load of the thread id in this
1598   // function.
1599   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1600   if (I != OpenMPLocThreadIDMap.end()) {
1601     ThreadID = I->second.ThreadID;
1602     if (ThreadID != nullptr)
1603       return ThreadID;
1604   }
1605   // If exceptions are enabled, do not use parameter to avoid possible crash.
1606   if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1607       !CGF.getLangOpts().CXXExceptions ||
1608       CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1609     if (auto *OMPRegionInfo =
1610             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1611       if (OMPRegionInfo->getThreadIDVariable()) {
1612         // Check if this an outlined function with thread id passed as argument.
1613         LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1614         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1615         // If value loaded in entry block, cache it and use it everywhere in
1616         // function.
1617         if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1618           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1619           Elem.second.ThreadID = ThreadID;
1620         }
1621         return ThreadID;
1622       }
1623     }
1624   }
1625 
1626   // This is not an outlined function region - need to call __kmpc_int32
1627   // kmpc_global_thread_num(ident_t *loc).
1628   // Generate thread id value and cache this value for use across the
1629   // function.
1630   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1631   if (!Elem.second.ServiceInsertPt)
1632     setLocThreadIdInsertPt(CGF);
1633   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1634   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1635   llvm::CallInst *Call = CGF.Builder.CreateCall(
1636       createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1637       emitUpdateLocation(CGF, Loc));
1638   Call->setCallingConv(CGF.getRuntimeCC());
1639   Elem.second.ThreadID = Call;
1640   return Call;
1641 }
1642 
1643 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1644   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1645   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1646     clearLocThreadIdInsertPt(CGF);
1647     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1648   }
1649   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1650     for(auto *D : FunctionUDRMap[CGF.CurFn])
1651       UDRMap.erase(D);
1652     FunctionUDRMap.erase(CGF.CurFn);
1653   }
1654 }
1655 
1656 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1657   return IdentTy->getPointerTo();
1658 }
1659 
1660 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1661   if (!Kmpc_MicroTy) {
1662     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1663     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1664                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1665     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1666   }
1667   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1668 }
1669 
1670 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1671   llvm::FunctionCallee RTLFn = nullptr;
1672   switch (static_cast<OpenMPRTLFunction>(Function)) {
1673   case OMPRTL__kmpc_fork_call: {
1674     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1675     // microtask, ...);
1676     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1677                                 getKmpc_MicroPointerTy()};
1678     auto *FnTy =
1679         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1680     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1681     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
1682       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
1683         llvm::LLVMContext &Ctx = F->getContext();
1684         llvm::MDBuilder MDB(Ctx);
1685         // Annotate the callback behavior of the __kmpc_fork_call:
1686         //  - The callback callee is argument number 2 (microtask).
1687         //  - The first two arguments of the callback callee are unknown (-1).
1688         //  - All variadic arguments to the __kmpc_fork_call are passed to the
1689         //    callback callee.
1690         F->addMetadata(
1691             llvm::LLVMContext::MD_callback,
1692             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
1693                                         2, {-1, -1},
1694                                         /* VarArgsArePassed */ true)}));
1695       }
1696     }
1697     break;
1698   }
1699   case OMPRTL__kmpc_global_thread_num: {
1700     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1701     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1702     auto *FnTy =
1703         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1704     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1705     break;
1706   }
1707   case OMPRTL__kmpc_threadprivate_cached: {
1708     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1709     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1710     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1711                                 CGM.VoidPtrTy, CGM.SizeTy,
1712                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1713     auto *FnTy =
1714         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1715     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1716     break;
1717   }
1718   case OMPRTL__kmpc_critical: {
1719     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1720     // kmp_critical_name *crit);
1721     llvm::Type *TypeParams[] = {
1722         getIdentTyPointerTy(), CGM.Int32Ty,
1723         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1724     auto *FnTy =
1725         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1726     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1727     break;
1728   }
1729   case OMPRTL__kmpc_critical_with_hint: {
1730     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1731     // kmp_critical_name *crit, uintptr_t hint);
1732     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1733                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
1734                                 CGM.IntPtrTy};
1735     auto *FnTy =
1736         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1737     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1738     break;
1739   }
1740   case OMPRTL__kmpc_threadprivate_register: {
1741     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1742     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1743     // typedef void *(*kmpc_ctor)(void *);
1744     auto *KmpcCtorTy =
1745         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1746                                 /*isVarArg*/ false)->getPointerTo();
1747     // typedef void *(*kmpc_cctor)(void *, void *);
1748     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1749     auto *KmpcCopyCtorTy =
1750         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1751                                 /*isVarArg*/ false)
1752             ->getPointerTo();
1753     // typedef void (*kmpc_dtor)(void *);
1754     auto *KmpcDtorTy =
1755         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1756             ->getPointerTo();
1757     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1758                               KmpcCopyCtorTy, KmpcDtorTy};
1759     auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1760                                         /*isVarArg*/ false);
1761     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1762     break;
1763   }
1764   case OMPRTL__kmpc_end_critical: {
1765     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1766     // kmp_critical_name *crit);
1767     llvm::Type *TypeParams[] = {
1768         getIdentTyPointerTy(), CGM.Int32Ty,
1769         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1770     auto *FnTy =
1771         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1772     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1773     break;
1774   }
1775   case OMPRTL__kmpc_cancel_barrier: {
1776     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1777     // global_tid);
1778     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1779     auto *FnTy =
1780         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1781     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1782     break;
1783   }
1784   case OMPRTL__kmpc_barrier: {
1785     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1786     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1787     auto *FnTy =
1788         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1789     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1790     break;
1791   }
1792   case OMPRTL__kmpc_for_static_fini: {
1793     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1794     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1795     auto *FnTy =
1796         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1797     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1798     break;
1799   }
1800   case OMPRTL__kmpc_push_num_threads: {
1801     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1802     // kmp_int32 num_threads)
1803     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1804                                 CGM.Int32Ty};
1805     auto *FnTy =
1806         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1807     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1808     break;
1809   }
1810   case OMPRTL__kmpc_serialized_parallel: {
1811     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1812     // global_tid);
1813     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1814     auto *FnTy =
1815         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1816     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1817     break;
1818   }
1819   case OMPRTL__kmpc_end_serialized_parallel: {
1820     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1821     // global_tid);
1822     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1823     auto *FnTy =
1824         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1825     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1826     break;
1827   }
1828   case OMPRTL__kmpc_flush: {
1829     // Build void __kmpc_flush(ident_t *loc);
1830     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1831     auto *FnTy =
1832         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1833     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1834     break;
1835   }
1836   case OMPRTL__kmpc_master: {
1837     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1838     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1839     auto *FnTy =
1840         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1841     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1842     break;
1843   }
1844   case OMPRTL__kmpc_end_master: {
1845     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1846     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1847     auto *FnTy =
1848         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1849     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1850     break;
1851   }
1852   case OMPRTL__kmpc_omp_taskyield: {
1853     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1854     // int end_part);
1855     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1856     auto *FnTy =
1857         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1858     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1859     break;
1860   }
1861   case OMPRTL__kmpc_single: {
1862     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1863     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1864     auto *FnTy =
1865         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1866     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1867     break;
1868   }
1869   case OMPRTL__kmpc_end_single: {
1870     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1871     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1872     auto *FnTy =
1873         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1874     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1875     break;
1876   }
1877   case OMPRTL__kmpc_omp_task_alloc: {
1878     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1879     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1880     // kmp_routine_entry_t *task_entry);
1881     assert(KmpRoutineEntryPtrTy != nullptr &&
1882            "Type kmp_routine_entry_t must be created.");
1883     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1884                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
1885     // Return void * and then cast to particular kmp_task_t type.
1886     auto *FnTy =
1887         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1888     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
1889     break;
1890   }
1891   case OMPRTL__kmpc_omp_task: {
1892     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1893     // *new_task);
1894     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1895                                 CGM.VoidPtrTy};
1896     auto *FnTy =
1897         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1898     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
1899     break;
1900   }
1901   case OMPRTL__kmpc_copyprivate: {
1902     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
1903     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
1904     // kmp_int32 didit);
1905     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1906     auto *CpyFnTy =
1907         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
1908     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
1909                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
1910                                 CGM.Int32Ty};
1911     auto *FnTy =
1912         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1913     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
1914     break;
1915   }
1916   case OMPRTL__kmpc_reduce: {
1917     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
1918     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
1919     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
1920     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1921     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1922                                                /*isVarArg=*/false);
1923     llvm::Type *TypeParams[] = {
1924         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1925         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1926         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1927     auto *FnTy =
1928         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1929     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
1930     break;
1931   }
1932   case OMPRTL__kmpc_reduce_nowait: {
1933     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
1934     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
1935     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
1936     // *lck);
1937     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1938     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1939                                                /*isVarArg=*/false);
1940     llvm::Type *TypeParams[] = {
1941         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1942         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1943         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1944     auto *FnTy =
1945         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1946     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
1947     break;
1948   }
1949   case OMPRTL__kmpc_end_reduce: {
1950     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
1951     // kmp_critical_name *lck);
1952     llvm::Type *TypeParams[] = {
1953         getIdentTyPointerTy(), CGM.Int32Ty,
1954         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1955     auto *FnTy =
1956         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1957     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
1958     break;
1959   }
1960   case OMPRTL__kmpc_end_reduce_nowait: {
1961     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
1962     // kmp_critical_name *lck);
1963     llvm::Type *TypeParams[] = {
1964         getIdentTyPointerTy(), CGM.Int32Ty,
1965         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1966     auto *FnTy =
1967         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1968     RTLFn =
1969         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
1970     break;
1971   }
1972   case OMPRTL__kmpc_omp_task_begin_if0: {
1973     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1974     // *new_task);
1975     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1976                                 CGM.VoidPtrTy};
1977     auto *FnTy =
1978         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1979     RTLFn =
1980         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
1981     break;
1982   }
1983   case OMPRTL__kmpc_omp_task_complete_if0: {
1984     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1985     // *new_task);
1986     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1987                                 CGM.VoidPtrTy};
1988     auto *FnTy =
1989         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1990     RTLFn = CGM.CreateRuntimeFunction(FnTy,
1991                                       /*Name=*/"__kmpc_omp_task_complete_if0");
1992     break;
1993   }
1994   case OMPRTL__kmpc_ordered: {
1995     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
1996     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1997     auto *FnTy =
1998         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1999     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
2000     break;
2001   }
2002   case OMPRTL__kmpc_end_ordered: {
2003     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
2004     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2005     auto *FnTy =
2006         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2007     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
2008     break;
2009   }
2010   case OMPRTL__kmpc_omp_taskwait: {
2011     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
2012     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2013     auto *FnTy =
2014         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2015     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
2016     break;
2017   }
2018   case OMPRTL__kmpc_taskgroup: {
2019     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
2020     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2021     auto *FnTy =
2022         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2023     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
2024     break;
2025   }
2026   case OMPRTL__kmpc_end_taskgroup: {
2027     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
2028     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2029     auto *FnTy =
2030         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2031     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
2032     break;
2033   }
2034   case OMPRTL__kmpc_push_proc_bind: {
2035     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
2036     // int proc_bind)
2037     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2038     auto *FnTy =
2039         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2040     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
2041     break;
2042   }
2043   case OMPRTL__kmpc_omp_task_with_deps: {
2044     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2045     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2046     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
2047     llvm::Type *TypeParams[] = {
2048         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
2049         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
2050     auto *FnTy =
2051         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2052     RTLFn =
2053         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
2054     break;
2055   }
2056   case OMPRTL__kmpc_omp_wait_deps: {
2057     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2058     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
2059     // kmp_depend_info_t *noalias_dep_list);
2060     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2061                                 CGM.Int32Ty,           CGM.VoidPtrTy,
2062                                 CGM.Int32Ty,           CGM.VoidPtrTy};
2063     auto *FnTy =
2064         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2065     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
2066     break;
2067   }
2068   case OMPRTL__kmpc_cancellationpoint: {
2069     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2070     // global_tid, kmp_int32 cncl_kind)
2071     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2072     auto *FnTy =
2073         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2074     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
2075     break;
2076   }
2077   case OMPRTL__kmpc_cancel: {
2078     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2079     // kmp_int32 cncl_kind)
2080     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2081     auto *FnTy =
2082         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2083     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
2084     break;
2085   }
2086   case OMPRTL__kmpc_push_num_teams: {
2087     // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
2088     // kmp_int32 num_teams, kmp_int32 num_threads)
2089     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2090         CGM.Int32Ty};
2091     auto *FnTy =
2092         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2093     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
2094     break;
2095   }
2096   case OMPRTL__kmpc_fork_teams: {
2097     // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
2098     // microtask, ...);
2099     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2100                                 getKmpc_MicroPointerTy()};
2101     auto *FnTy =
2102         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
2103     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
2104     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
2105       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
2106         llvm::LLVMContext &Ctx = F->getContext();
2107         llvm::MDBuilder MDB(Ctx);
2108         // Annotate the callback behavior of the __kmpc_fork_teams:
2109         //  - The callback callee is argument number 2 (microtask).
2110         //  - The first two arguments of the callback callee are unknown (-1).
2111         //  - All variadic arguments to the __kmpc_fork_teams are passed to the
2112         //    callback callee.
2113         F->addMetadata(
2114             llvm::LLVMContext::MD_callback,
2115             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
2116                                         2, {-1, -1},
2117                                         /* VarArgsArePassed */ true)}));
2118       }
2119     }
2120     break;
2121   }
2122   case OMPRTL__kmpc_taskloop: {
2123     // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
2124     // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
2125     // sched, kmp_uint64 grainsize, void *task_dup);
2126     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2127                                 CGM.IntTy,
2128                                 CGM.VoidPtrTy,
2129                                 CGM.IntTy,
2130                                 CGM.Int64Ty->getPointerTo(),
2131                                 CGM.Int64Ty->getPointerTo(),
2132                                 CGM.Int64Ty,
2133                                 CGM.IntTy,
2134                                 CGM.IntTy,
2135                                 CGM.Int64Ty,
2136                                 CGM.VoidPtrTy};
2137     auto *FnTy =
2138         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2139     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2140     break;
2141   }
2142   case OMPRTL__kmpc_doacross_init: {
2143     // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2144     // num_dims, struct kmp_dim *dims);
2145     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2146                                 CGM.Int32Ty,
2147                                 CGM.Int32Ty,
2148                                 CGM.VoidPtrTy};
2149     auto *FnTy =
2150         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2151     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2152     break;
2153   }
2154   case OMPRTL__kmpc_doacross_fini: {
2155     // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2156     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2157     auto *FnTy =
2158         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2159     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2160     break;
2161   }
2162   case OMPRTL__kmpc_doacross_post: {
2163     // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2164     // *vec);
2165     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2166                                 CGM.Int64Ty->getPointerTo()};
2167     auto *FnTy =
2168         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2169     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2170     break;
2171   }
2172   case OMPRTL__kmpc_doacross_wait: {
2173     // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2174     // *vec);
2175     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2176                                 CGM.Int64Ty->getPointerTo()};
2177     auto *FnTy =
2178         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2179     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2180     break;
2181   }
2182   case OMPRTL__kmpc_task_reduction_init: {
2183     // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2184     // *data);
2185     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2186     auto *FnTy =
2187         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2188     RTLFn =
2189         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2190     break;
2191   }
2192   case OMPRTL__kmpc_task_reduction_get_th_data: {
2193     // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2194     // *d);
2195     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2196     auto *FnTy =
2197         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2198     RTLFn = CGM.CreateRuntimeFunction(
2199         FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2200     break;
2201   }
2202   case OMPRTL__kmpc_alloc: {
2203     // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t
2204     // al); omp_allocator_handle_t type is void *.
2205     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy};
2206     auto *FnTy =
2207         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2208     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc");
2209     break;
2210   }
2211   case OMPRTL__kmpc_free: {
2212     // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t
2213     // al); omp_allocator_handle_t type is void *.
2214     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2215     auto *FnTy =
2216         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2217     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free");
2218     break;
2219   }
2220   case OMPRTL__kmpc_push_target_tripcount: {
2221     // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
2222     // size);
2223     llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
2224     llvm::FunctionType *FnTy =
2225         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2226     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
2227     break;
2228   }
2229   case OMPRTL__tgt_target: {
2230     // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2231     // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2232     // *arg_types);
2233     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2234                                 CGM.VoidPtrTy,
2235                                 CGM.Int32Ty,
2236                                 CGM.VoidPtrPtrTy,
2237                                 CGM.VoidPtrPtrTy,
2238                                 CGM.SizeTy->getPointerTo(),
2239                                 CGM.Int64Ty->getPointerTo()};
2240     auto *FnTy =
2241         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2242     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2243     break;
2244   }
2245   case OMPRTL__tgt_target_nowait: {
2246     // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2247     // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
2248     // int64_t *arg_types);
2249     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2250                                 CGM.VoidPtrTy,
2251                                 CGM.Int32Ty,
2252                                 CGM.VoidPtrPtrTy,
2253                                 CGM.VoidPtrPtrTy,
2254                                 CGM.SizeTy->getPointerTo(),
2255                                 CGM.Int64Ty->getPointerTo()};
2256     auto *FnTy =
2257         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2258     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2259     break;
2260   }
2261   case OMPRTL__tgt_target_teams: {
2262     // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2263     // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
2264     // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2265     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2266                                 CGM.VoidPtrTy,
2267                                 CGM.Int32Ty,
2268                                 CGM.VoidPtrPtrTy,
2269                                 CGM.VoidPtrPtrTy,
2270                                 CGM.SizeTy->getPointerTo(),
2271                                 CGM.Int64Ty->getPointerTo(),
2272                                 CGM.Int32Ty,
2273                                 CGM.Int32Ty};
2274     auto *FnTy =
2275         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2276     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2277     break;
2278   }
2279   case OMPRTL__tgt_target_teams_nowait: {
2280     // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2281     // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t
2282     // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2283     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2284                                 CGM.VoidPtrTy,
2285                                 CGM.Int32Ty,
2286                                 CGM.VoidPtrPtrTy,
2287                                 CGM.VoidPtrPtrTy,
2288                                 CGM.SizeTy->getPointerTo(),
2289                                 CGM.Int64Ty->getPointerTo(),
2290                                 CGM.Int32Ty,
2291                                 CGM.Int32Ty};
2292     auto *FnTy =
2293         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2294     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2295     break;
2296   }
2297   case OMPRTL__tgt_register_lib: {
2298     // Build void __tgt_register_lib(__tgt_bin_desc *desc);
2299     QualType ParamTy =
2300         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2301     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2302     auto *FnTy =
2303         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2304     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
2305     break;
2306   }
2307   case OMPRTL__tgt_unregister_lib: {
2308     // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
2309     QualType ParamTy =
2310         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2311     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2312     auto *FnTy =
2313         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2314     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
2315     break;
2316   }
2317   case OMPRTL__tgt_target_data_begin: {
2318     // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2319     // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2320     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2321                                 CGM.Int32Ty,
2322                                 CGM.VoidPtrPtrTy,
2323                                 CGM.VoidPtrPtrTy,
2324                                 CGM.SizeTy->getPointerTo(),
2325                                 CGM.Int64Ty->getPointerTo()};
2326     auto *FnTy =
2327         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2328     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2329     break;
2330   }
2331   case OMPRTL__tgt_target_data_begin_nowait: {
2332     // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2333     // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2334     // *arg_types);
2335     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2336                                 CGM.Int32Ty,
2337                                 CGM.VoidPtrPtrTy,
2338                                 CGM.VoidPtrPtrTy,
2339                                 CGM.SizeTy->getPointerTo(),
2340                                 CGM.Int64Ty->getPointerTo()};
2341     auto *FnTy =
2342         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2343     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2344     break;
2345   }
2346   case OMPRTL__tgt_target_data_end: {
2347     // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2348     // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2349     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2350                                 CGM.Int32Ty,
2351                                 CGM.VoidPtrPtrTy,
2352                                 CGM.VoidPtrPtrTy,
2353                                 CGM.SizeTy->getPointerTo(),
2354                                 CGM.Int64Ty->getPointerTo()};
2355     auto *FnTy =
2356         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2357     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2358     break;
2359   }
2360   case OMPRTL__tgt_target_data_end_nowait: {
2361     // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2362     // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2363     // *arg_types);
2364     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2365                                 CGM.Int32Ty,
2366                                 CGM.VoidPtrPtrTy,
2367                                 CGM.VoidPtrPtrTy,
2368                                 CGM.SizeTy->getPointerTo(),
2369                                 CGM.Int64Ty->getPointerTo()};
2370     auto *FnTy =
2371         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2372     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2373     break;
2374   }
2375   case OMPRTL__tgt_target_data_update: {
2376     // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2377     // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2378     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2379                                 CGM.Int32Ty,
2380                                 CGM.VoidPtrPtrTy,
2381                                 CGM.VoidPtrPtrTy,
2382                                 CGM.SizeTy->getPointerTo(),
2383                                 CGM.Int64Ty->getPointerTo()};
2384     auto *FnTy =
2385         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2386     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2387     break;
2388   }
2389   case OMPRTL__tgt_target_data_update_nowait: {
2390     // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2391     // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2392     // *arg_types);
2393     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2394                                 CGM.Int32Ty,
2395                                 CGM.VoidPtrPtrTy,
2396                                 CGM.VoidPtrPtrTy,
2397                                 CGM.SizeTy->getPointerTo(),
2398                                 CGM.Int64Ty->getPointerTo()};
2399     auto *FnTy =
2400         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2401     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2402     break;
2403   }
2404   }
2405   assert(RTLFn && "Unable to find OpenMP runtime function");
2406   return RTLFn;
2407 }
2408 
2409 llvm::FunctionCallee
2410 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
2411   assert((IVSize == 32 || IVSize == 64) &&
2412          "IV size is not compatible with the omp runtime");
2413   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2414                                             : "__kmpc_for_static_init_4u")
2415                                 : (IVSigned ? "__kmpc_for_static_init_8"
2416                                             : "__kmpc_for_static_init_8u");
2417   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2418   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2419   llvm::Type *TypeParams[] = {
2420     getIdentTyPointerTy(),                     // loc
2421     CGM.Int32Ty,                               // tid
2422     CGM.Int32Ty,                               // schedtype
2423     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2424     PtrTy,                                     // p_lower
2425     PtrTy,                                     // p_upper
2426     PtrTy,                                     // p_stride
2427     ITy,                                       // incr
2428     ITy                                        // chunk
2429   };
2430   auto *FnTy =
2431       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2432   return CGM.CreateRuntimeFunction(FnTy, Name);
2433 }
2434 
2435 llvm::FunctionCallee
2436 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
2437   assert((IVSize == 32 || IVSize == 64) &&
2438          "IV size is not compatible with the omp runtime");
2439   StringRef Name =
2440       IVSize == 32
2441           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2442           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2443   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2444   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2445                                CGM.Int32Ty,           // tid
2446                                CGM.Int32Ty,           // schedtype
2447                                ITy,                   // lower
2448                                ITy,                   // upper
2449                                ITy,                   // stride
2450                                ITy                    // chunk
2451   };
2452   auto *FnTy =
2453       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2454   return CGM.CreateRuntimeFunction(FnTy, Name);
2455 }
2456 
2457 llvm::FunctionCallee
2458 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
2459   assert((IVSize == 32 || IVSize == 64) &&
2460          "IV size is not compatible with the omp runtime");
2461   StringRef Name =
2462       IVSize == 32
2463           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2464           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2465   llvm::Type *TypeParams[] = {
2466       getIdentTyPointerTy(), // loc
2467       CGM.Int32Ty,           // tid
2468   };
2469   auto *FnTy =
2470       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2471   return CGM.CreateRuntimeFunction(FnTy, Name);
2472 }
2473 
2474 llvm::FunctionCallee
2475 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
2476   assert((IVSize == 32 || IVSize == 64) &&
2477          "IV size is not compatible with the omp runtime");
2478   StringRef Name =
2479       IVSize == 32
2480           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2481           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2482   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2483   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2484   llvm::Type *TypeParams[] = {
2485     getIdentTyPointerTy(),                     // loc
2486     CGM.Int32Ty,                               // tid
2487     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2488     PtrTy,                                     // p_lower
2489     PtrTy,                                     // p_upper
2490     PtrTy                                      // p_stride
2491   };
2492   auto *FnTy =
2493       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2494   return CGM.CreateRuntimeFunction(FnTy, Name);
2495 }
2496 
2497 Address CGOpenMPRuntime::getAddrOfDeclareTargetLink(const VarDecl *VD) {
2498   if (CGM.getLangOpts().OpenMPSimd)
2499     return Address::invalid();
2500   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2501       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2502   if (Res && *Res == OMPDeclareTargetDeclAttr::MT_Link) {
2503     SmallString<64> PtrName;
2504     {
2505       llvm::raw_svector_ostream OS(PtrName);
2506       OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_link_ptr";
2507     }
2508     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2509     if (!Ptr) {
2510       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2511       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
2512                                         PtrName);
2513       if (!CGM.getLangOpts().OpenMPIsDevice) {
2514         auto *GV = cast<llvm::GlobalVariable>(Ptr);
2515         GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
2516         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2517       }
2518       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ptr));
2519       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2520     }
2521     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2522   }
2523   return Address::invalid();
2524 }
2525 
2526 llvm::Constant *
2527 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
2528   assert(!CGM.getLangOpts().OpenMPUseTLS ||
2529          !CGM.getContext().getTargetInfo().isTLSSupported());
2530   // Lookup the entry, lazily creating it if necessary.
2531   std::string Suffix = getName({"cache", ""});
2532   return getOrCreateInternalVariable(
2533       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
2534 }
2535 
2536 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
2537                                                 const VarDecl *VD,
2538                                                 Address VDAddr,
2539                                                 SourceLocation Loc) {
2540   if (CGM.getLangOpts().OpenMPUseTLS &&
2541       CGM.getContext().getTargetInfo().isTLSSupported())
2542     return VDAddr;
2543 
2544   llvm::Type *VarTy = VDAddr.getElementType();
2545   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2546                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2547                                                        CGM.Int8PtrTy),
2548                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
2549                          getOrCreateThreadPrivateCache(VD)};
2550   return Address(CGF.EmitRuntimeCall(
2551       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2552                  VDAddr.getAlignment());
2553 }
2554 
2555 void CGOpenMPRuntime::emitThreadPrivateVarInit(
2556     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2557     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2558   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2559   // library.
2560   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
2561   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
2562                       OMPLoc);
2563   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2564   // to register constructor/destructor for variable.
2565   llvm::Value *Args[] = {
2566       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
2567       Ctor, CopyCtor, Dtor};
2568   CGF.EmitRuntimeCall(
2569       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
2570 }
2571 
2572 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
2573     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2574     bool PerformInit, CodeGenFunction *CGF) {
2575   if (CGM.getLangOpts().OpenMPUseTLS &&
2576       CGM.getContext().getTargetInfo().isTLSSupported())
2577     return nullptr;
2578 
2579   VD = VD->getDefinition(CGM.getContext());
2580   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
2581     QualType ASTTy = VD->getType();
2582 
2583     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2584     const Expr *Init = VD->getAnyInitializer();
2585     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2586       // Generate function that re-emits the declaration's initializer into the
2587       // threadprivate copy of the variable VD
2588       CodeGenFunction CtorCGF(CGM);
2589       FunctionArgList Args;
2590       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2591                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2592                             ImplicitParamDecl::Other);
2593       Args.push_back(&Dst);
2594 
2595       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2596           CGM.getContext().VoidPtrTy, Args);
2597       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2598       std::string Name = getName({"__kmpc_global_ctor_", ""});
2599       llvm::Function *Fn =
2600           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2601       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2602                             Args, Loc, Loc);
2603       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
2604           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2605           CGM.getContext().VoidPtrTy, Dst.getLocation());
2606       Address Arg = Address(ArgVal, VDAddr.getAlignment());
2607       Arg = CtorCGF.Builder.CreateElementBitCast(
2608           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2609       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2610                                /*IsInitializer=*/true);
2611       ArgVal = CtorCGF.EmitLoadOfScalar(
2612           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2613           CGM.getContext().VoidPtrTy, Dst.getLocation());
2614       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2615       CtorCGF.FinishFunction();
2616       Ctor = Fn;
2617     }
2618     if (VD->getType().isDestructedType() != QualType::DK_none) {
2619       // Generate function that emits destructor call for the threadprivate copy
2620       // of the variable VD
2621       CodeGenFunction DtorCGF(CGM);
2622       FunctionArgList Args;
2623       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2624                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2625                             ImplicitParamDecl::Other);
2626       Args.push_back(&Dst);
2627 
2628       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2629           CGM.getContext().VoidTy, Args);
2630       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2631       std::string Name = getName({"__kmpc_global_dtor_", ""});
2632       llvm::Function *Fn =
2633           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2634       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2635       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2636                             Loc, Loc);
2637       // Create a scope with an artificial location for the body of this function.
2638       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2639       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
2640           DtorCGF.GetAddrOfLocalVar(&Dst),
2641           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2642       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2643                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2644                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2645       DtorCGF.FinishFunction();
2646       Dtor = Fn;
2647     }
2648     // Do not emit init function if it is not required.
2649     if (!Ctor && !Dtor)
2650       return nullptr;
2651 
2652     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2653     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2654                                                /*isVarArg=*/false)
2655                            ->getPointerTo();
2656     // Copying constructor for the threadprivate variable.
2657     // Must be NULL - reserved by runtime, but currently it requires that this
2658     // parameter is always NULL. Otherwise it fires assertion.
2659     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2660     if (Ctor == nullptr) {
2661       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2662                                              /*isVarArg=*/false)
2663                          ->getPointerTo();
2664       Ctor = llvm::Constant::getNullValue(CtorTy);
2665     }
2666     if (Dtor == nullptr) {
2667       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2668                                              /*isVarArg=*/false)
2669                          ->getPointerTo();
2670       Dtor = llvm::Constant::getNullValue(DtorTy);
2671     }
2672     if (!CGF) {
2673       auto *InitFunctionTy =
2674           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2675       std::string Name = getName({"__omp_threadprivate_init_", ""});
2676       llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2677           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
2678       CodeGenFunction InitCGF(CGM);
2679       FunctionArgList ArgList;
2680       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2681                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
2682                             Loc, Loc);
2683       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2684       InitCGF.FinishFunction();
2685       return InitFunction;
2686     }
2687     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2688   }
2689   return nullptr;
2690 }
2691 
2692 /// Obtain information that uniquely identifies a target entry. This
2693 /// consists of the file and device IDs as well as line number associated with
2694 /// the relevant entry source location.
2695 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
2696                                      unsigned &DeviceID, unsigned &FileID,
2697                                      unsigned &LineNum) {
2698   SourceManager &SM = C.getSourceManager();
2699 
2700   // The loc should be always valid and have a file ID (the user cannot use
2701   // #pragma directives in macros)
2702 
2703   assert(Loc.isValid() && "Source location is expected to be always valid.");
2704 
2705   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2706   assert(PLoc.isValid() && "Source location is expected to be always valid.");
2707 
2708   llvm::sys::fs::UniqueID ID;
2709   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2710     SM.getDiagnostics().Report(diag::err_cannot_open_file)
2711         << PLoc.getFilename() << EC.message();
2712 
2713   DeviceID = ID.getDevice();
2714   FileID = ID.getFile();
2715   LineNum = PLoc.getLine();
2716 }
2717 
2718 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
2719                                                      llvm::GlobalVariable *Addr,
2720                                                      bool PerformInit) {
2721   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2722       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2723   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link)
2724     return CGM.getLangOpts().OpenMPIsDevice;
2725   VD = VD->getDefinition(CGM.getContext());
2726   if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
2727     return CGM.getLangOpts().OpenMPIsDevice;
2728 
2729   QualType ASTTy = VD->getType();
2730 
2731   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
2732   // Produce the unique prefix to identify the new target regions. We use
2733   // the source location of the variable declaration which we know to not
2734   // conflict with any target region.
2735   unsigned DeviceID;
2736   unsigned FileID;
2737   unsigned Line;
2738   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2739   SmallString<128> Buffer, Out;
2740   {
2741     llvm::raw_svector_ostream OS(Buffer);
2742     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2743        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2744   }
2745 
2746   const Expr *Init = VD->getAnyInitializer();
2747   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2748     llvm::Constant *Ctor;
2749     llvm::Constant *ID;
2750     if (CGM.getLangOpts().OpenMPIsDevice) {
2751       // Generate function that re-emits the declaration's initializer into
2752       // the threadprivate copy of the variable VD
2753       CodeGenFunction CtorCGF(CGM);
2754 
2755       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2756       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2757       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2758           FTy, Twine(Buffer, "_ctor"), FI, Loc);
2759       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2760       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2761                             FunctionArgList(), Loc, Loc);
2762       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2763       CtorCGF.EmitAnyExprToMem(Init,
2764                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
2765                                Init->getType().getQualifiers(),
2766                                /*IsInitializer=*/true);
2767       CtorCGF.FinishFunction();
2768       Ctor = Fn;
2769       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2770       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
2771     } else {
2772       Ctor = new llvm::GlobalVariable(
2773           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2774           llvm::GlobalValue::PrivateLinkage,
2775           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2776       ID = Ctor;
2777     }
2778 
2779     // Register the information for the entry associated with the constructor.
2780     Out.clear();
2781     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2782         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2783         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
2784   }
2785   if (VD->getType().isDestructedType() != QualType::DK_none) {
2786     llvm::Constant *Dtor;
2787     llvm::Constant *ID;
2788     if (CGM.getLangOpts().OpenMPIsDevice) {
2789       // Generate function that emits destructor call for the threadprivate
2790       // copy of the variable VD
2791       CodeGenFunction DtorCGF(CGM);
2792 
2793       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2794       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2795       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2796           FTy, Twine(Buffer, "_dtor"), FI, Loc);
2797       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2798       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2799                             FunctionArgList(), Loc, Loc);
2800       // Create a scope with an artificial location for the body of this
2801       // function.
2802       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2803       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2804                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2805                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2806       DtorCGF.FinishFunction();
2807       Dtor = Fn;
2808       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2809       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
2810     } else {
2811       Dtor = new llvm::GlobalVariable(
2812           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2813           llvm::GlobalValue::PrivateLinkage,
2814           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2815       ID = Dtor;
2816     }
2817     // Register the information for the entry associated with the destructor.
2818     Out.clear();
2819     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2820         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2821         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2822   }
2823   return CGM.getLangOpts().OpenMPIsDevice;
2824 }
2825 
2826 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2827                                                           QualType VarType,
2828                                                           StringRef Name) {
2829   std::string Suffix = getName({"artificial", ""});
2830   std::string CacheSuffix = getName({"cache", ""});
2831   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2832   llvm::Value *GAddr =
2833       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2834   llvm::Value *Args[] = {
2835       emitUpdateLocation(CGF, SourceLocation()),
2836       getThreadID(CGF, SourceLocation()),
2837       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2838       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2839                                 /*IsSigned=*/false),
2840       getOrCreateInternalVariable(
2841           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2842   return Address(
2843       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2844           CGF.EmitRuntimeCall(
2845               createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2846           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2847       CGM.getPointerAlign());
2848 }
2849 
2850 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
2851                                       const RegionCodeGenTy &ThenGen,
2852                                       const RegionCodeGenTy &ElseGen) {
2853   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2854 
2855   // If the condition constant folds and can be elided, try to avoid emitting
2856   // the condition and the dead arm of the if/else.
2857   bool CondConstant;
2858   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2859     if (CondConstant)
2860       ThenGen(CGF);
2861     else
2862       ElseGen(CGF);
2863     return;
2864   }
2865 
2866   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2867   // emit the conditional branch.
2868   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2869   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2870   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2871   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2872 
2873   // Emit the 'then' code.
2874   CGF.EmitBlock(ThenBlock);
2875   ThenGen(CGF);
2876   CGF.EmitBranch(ContBlock);
2877   // Emit the 'else' code if present.
2878   // There is no need to emit line number for unconditional branch.
2879   (void)ApplyDebugLocation::CreateEmpty(CGF);
2880   CGF.EmitBlock(ElseBlock);
2881   ElseGen(CGF);
2882   // There is no need to emit line number for unconditional branch.
2883   (void)ApplyDebugLocation::CreateEmpty(CGF);
2884   CGF.EmitBranch(ContBlock);
2885   // Emit the continuation block for code after the if.
2886   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2887 }
2888 
2889 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2890                                        llvm::Function *OutlinedFn,
2891                                        ArrayRef<llvm::Value *> CapturedVars,
2892                                        const Expr *IfCond) {
2893   if (!CGF.HaveInsertPoint())
2894     return;
2895   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2896   auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
2897                                                      PrePostActionTy &) {
2898     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2899     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2900     llvm::Value *Args[] = {
2901         RTLoc,
2902         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2903         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2904     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2905     RealArgs.append(std::begin(Args), std::end(Args));
2906     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2907 
2908     llvm::FunctionCallee RTLFn =
2909         RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
2910     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2911   };
2912   auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
2913                                                           PrePostActionTy &) {
2914     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2915     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2916     // Build calls:
2917     // __kmpc_serialized_parallel(&Loc, GTid);
2918     llvm::Value *Args[] = {RTLoc, ThreadID};
2919     CGF.EmitRuntimeCall(
2920         RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
2921 
2922     // OutlinedFn(&GTid, &zero, CapturedStruct);
2923     Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2924                                                         /*Name*/ ".zero.addr");
2925     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
2926     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2927     // ThreadId for serialized parallels is 0.
2928     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2929     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2930     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2931     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2932 
2933     // __kmpc_end_serialized_parallel(&Loc, GTid);
2934     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2935     CGF.EmitRuntimeCall(
2936         RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
2937         EndArgs);
2938   };
2939   if (IfCond) {
2940     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
2941   } else {
2942     RegionCodeGenTy ThenRCG(ThenGen);
2943     ThenRCG(CGF);
2944   }
2945 }
2946 
2947 // If we're inside an (outlined) parallel region, use the region info's
2948 // thread-ID variable (it is passed in a first argument of the outlined function
2949 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2950 // regular serial code region, get thread ID by calling kmp_int32
2951 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2952 // return the address of that temp.
2953 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2954                                              SourceLocation Loc) {
2955   if (auto *OMPRegionInfo =
2956           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2957     if (OMPRegionInfo->getThreadIDVariable())
2958       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
2959 
2960   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2961   QualType Int32Ty =
2962       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2963   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2964   CGF.EmitStoreOfScalar(ThreadID,
2965                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2966 
2967   return ThreadIDTemp;
2968 }
2969 
2970 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2971     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2972   SmallString<256> Buffer;
2973   llvm::raw_svector_ostream Out(Buffer);
2974   Out << Name;
2975   StringRef RuntimeName = Out.str();
2976   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2977   if (Elem.second) {
2978     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2979            "OMP internal variable has different type than requested");
2980     return &*Elem.second;
2981   }
2982 
2983   return Elem.second = new llvm::GlobalVariable(
2984              CGM.getModule(), Ty, /*IsConstant*/ false,
2985              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2986              Elem.first(), /*InsertBefore=*/nullptr,
2987              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2988 }
2989 
2990 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2991   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2992   std::string Name = getName({Prefix, "var"});
2993   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2994 }
2995 
2996 namespace {
2997 /// Common pre(post)-action for different OpenMP constructs.
2998 class CommonActionTy final : public PrePostActionTy {
2999   llvm::FunctionCallee EnterCallee;
3000   ArrayRef<llvm::Value *> EnterArgs;
3001   llvm::FunctionCallee ExitCallee;
3002   ArrayRef<llvm::Value *> ExitArgs;
3003   bool Conditional;
3004   llvm::BasicBlock *ContBlock = nullptr;
3005 
3006 public:
3007   CommonActionTy(llvm::FunctionCallee EnterCallee,
3008                  ArrayRef<llvm::Value *> EnterArgs,
3009                  llvm::FunctionCallee ExitCallee,
3010                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
3011       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
3012         ExitArgs(ExitArgs), Conditional(Conditional) {}
3013   void Enter(CodeGenFunction &CGF) override {
3014     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
3015     if (Conditional) {
3016       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
3017       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
3018       ContBlock = CGF.createBasicBlock("omp_if.end");
3019       // Generate the branch (If-stmt)
3020       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
3021       CGF.EmitBlock(ThenBlock);
3022     }
3023   }
3024   void Done(CodeGenFunction &CGF) {
3025     // Emit the rest of blocks/branches
3026     CGF.EmitBranch(ContBlock);
3027     CGF.EmitBlock(ContBlock, true);
3028   }
3029   void Exit(CodeGenFunction &CGF) override {
3030     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
3031   }
3032 };
3033 } // anonymous namespace
3034 
3035 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
3036                                          StringRef CriticalName,
3037                                          const RegionCodeGenTy &CriticalOpGen,
3038                                          SourceLocation Loc, const Expr *Hint) {
3039   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
3040   // CriticalOpGen();
3041   // __kmpc_end_critical(ident_t *, gtid, Lock);
3042   // Prepare arguments and build a call to __kmpc_critical
3043   if (!CGF.HaveInsertPoint())
3044     return;
3045   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3046                          getCriticalRegionLock(CriticalName)};
3047   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
3048                                                 std::end(Args));
3049   if (Hint) {
3050     EnterArgs.push_back(CGF.Builder.CreateIntCast(
3051         CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
3052   }
3053   CommonActionTy Action(
3054       createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
3055                                  : OMPRTL__kmpc_critical),
3056       EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
3057   CriticalOpGen.setAction(Action);
3058   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
3059 }
3060 
3061 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
3062                                        const RegionCodeGenTy &MasterOpGen,
3063                                        SourceLocation Loc) {
3064   if (!CGF.HaveInsertPoint())
3065     return;
3066   // if(__kmpc_master(ident_t *, gtid)) {
3067   //   MasterOpGen();
3068   //   __kmpc_end_master(ident_t *, gtid);
3069   // }
3070   // Prepare arguments and build a call to __kmpc_master
3071   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3072   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
3073                         createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
3074                         /*Conditional=*/true);
3075   MasterOpGen.setAction(Action);
3076   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
3077   Action.Done(CGF);
3078 }
3079 
3080 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
3081                                         SourceLocation Loc) {
3082   if (!CGF.HaveInsertPoint())
3083     return;
3084   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
3085   llvm::Value *Args[] = {
3086       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3087       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
3088   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
3089   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3090     Region->emitUntiedSwitch(CGF);
3091 }
3092 
3093 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
3094                                           const RegionCodeGenTy &TaskgroupOpGen,
3095                                           SourceLocation Loc) {
3096   if (!CGF.HaveInsertPoint())
3097     return;
3098   // __kmpc_taskgroup(ident_t *, gtid);
3099   // TaskgroupOpGen();
3100   // __kmpc_end_taskgroup(ident_t *, gtid);
3101   // Prepare arguments and build a call to __kmpc_taskgroup
3102   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3103   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
3104                         createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
3105                         Args);
3106   TaskgroupOpGen.setAction(Action);
3107   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
3108 }
3109 
3110 /// Given an array of pointers to variables, project the address of a
3111 /// given variable.
3112 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
3113                                       unsigned Index, const VarDecl *Var) {
3114   // Pull out the pointer to the variable.
3115   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
3116   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
3117 
3118   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
3119   Addr = CGF.Builder.CreateElementBitCast(
3120       Addr, CGF.ConvertTypeForMem(Var->getType()));
3121   return Addr;
3122 }
3123 
3124 static llvm::Value *emitCopyprivateCopyFunction(
3125     CodeGenModule &CGM, llvm::Type *ArgsType,
3126     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
3127     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
3128     SourceLocation Loc) {
3129   ASTContext &C = CGM.getContext();
3130   // void copy_func(void *LHSArg, void *RHSArg);
3131   FunctionArgList Args;
3132   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3133                            ImplicitParamDecl::Other);
3134   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3135                            ImplicitParamDecl::Other);
3136   Args.push_back(&LHSArg);
3137   Args.push_back(&RHSArg);
3138   const auto &CGFI =
3139       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3140   std::string Name =
3141       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
3142   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
3143                                     llvm::GlobalValue::InternalLinkage, Name,
3144                                     &CGM.getModule());
3145   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
3146   Fn->setDoesNotRecurse();
3147   CodeGenFunction CGF(CGM);
3148   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
3149   // Dest = (void*[n])(LHSArg);
3150   // Src = (void*[n])(RHSArg);
3151   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3152       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3153       ArgsType), CGF.getPointerAlign());
3154   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3155       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3156       ArgsType), CGF.getPointerAlign());
3157   // *(Type0*)Dst[0] = *(Type0*)Src[0];
3158   // *(Type1*)Dst[1] = *(Type1*)Src[1];
3159   // ...
3160   // *(Typen*)Dst[n] = *(Typen*)Src[n];
3161   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
3162     const auto *DestVar =
3163         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3164     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3165 
3166     const auto *SrcVar =
3167         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3168     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3169 
3170     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3171     QualType Type = VD->getType();
3172     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3173   }
3174   CGF.FinishFunction();
3175   return Fn;
3176 }
3177 
3178 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
3179                                        const RegionCodeGenTy &SingleOpGen,
3180                                        SourceLocation Loc,
3181                                        ArrayRef<const Expr *> CopyprivateVars,
3182                                        ArrayRef<const Expr *> SrcExprs,
3183                                        ArrayRef<const Expr *> DstExprs,
3184                                        ArrayRef<const Expr *> AssignmentOps) {
3185   if (!CGF.HaveInsertPoint())
3186     return;
3187   assert(CopyprivateVars.size() == SrcExprs.size() &&
3188          CopyprivateVars.size() == DstExprs.size() &&
3189          CopyprivateVars.size() == AssignmentOps.size());
3190   ASTContext &C = CGM.getContext();
3191   // int32 did_it = 0;
3192   // if(__kmpc_single(ident_t *, gtid)) {
3193   //   SingleOpGen();
3194   //   __kmpc_end_single(ident_t *, gtid);
3195   //   did_it = 1;
3196   // }
3197   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3198   // <copy_func>, did_it);
3199 
3200   Address DidIt = Address::invalid();
3201   if (!CopyprivateVars.empty()) {
3202     // int32 did_it = 0;
3203     QualType KmpInt32Ty =
3204         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3205     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3206     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3207   }
3208   // Prepare arguments and build a call to __kmpc_single
3209   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3210   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3211                         createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
3212                         /*Conditional=*/true);
3213   SingleOpGen.setAction(Action);
3214   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3215   if (DidIt.isValid()) {
3216     // did_it = 1;
3217     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3218   }
3219   Action.Done(CGF);
3220   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3221   // <copy_func>, did_it);
3222   if (DidIt.isValid()) {
3223     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3224     QualType CopyprivateArrayTy =
3225         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
3226                                /*IndexTypeQuals=*/0);
3227     // Create a list of all private variables for copyprivate.
3228     Address CopyprivateList =
3229         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3230     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
3231       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
3232       CGF.Builder.CreateStore(
3233           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3234               CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
3235           Elem);
3236     }
3237     // Build function that copies private values from single region to all other
3238     // threads in the corresponding parallel region.
3239     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
3240         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
3241         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3242     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3243     Address CL =
3244       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3245                                                       CGF.VoidPtrTy);
3246     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
3247     llvm::Value *Args[] = {
3248         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3249         getThreadID(CGF, Loc),        // i32 <gtid>
3250         BufSize,                      // size_t <buf_size>
3251         CL.getPointer(),              // void *<copyprivate list>
3252         CpyFn,                        // void (*) (void *, void *) <copy_func>
3253         DidItVal                      // i32 did_it
3254     };
3255     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
3256   }
3257 }
3258 
3259 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
3260                                         const RegionCodeGenTy &OrderedOpGen,
3261                                         SourceLocation Loc, bool IsThreads) {
3262   if (!CGF.HaveInsertPoint())
3263     return;
3264   // __kmpc_ordered(ident_t *, gtid);
3265   // OrderedOpGen();
3266   // __kmpc_end_ordered(ident_t *, gtid);
3267   // Prepare arguments and build a call to __kmpc_ordered
3268   if (IsThreads) {
3269     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3270     CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3271                           createRuntimeFunction(OMPRTL__kmpc_end_ordered),
3272                           Args);
3273     OrderedOpGen.setAction(Action);
3274     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3275     return;
3276   }
3277   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3278 }
3279 
3280 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
3281   unsigned Flags;
3282   if (Kind == OMPD_for)
3283     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3284   else if (Kind == OMPD_sections)
3285     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3286   else if (Kind == OMPD_single)
3287     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3288   else if (Kind == OMPD_barrier)
3289     Flags = OMP_IDENT_BARRIER_EXPL;
3290   else
3291     Flags = OMP_IDENT_BARRIER_IMPL;
3292   return Flags;
3293 }
3294 
3295 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
3296     CodeGenFunction &CGF, const OMPLoopDirective &S,
3297     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
3298   // Check if the loop directive is actually a doacross loop directive. In this
3299   // case choose static, 1 schedule.
3300   if (llvm::any_of(
3301           S.getClausesOfKind<OMPOrderedClause>(),
3302           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
3303     ScheduleKind = OMPC_SCHEDULE_static;
3304     // Chunk size is 1 in this case.
3305     llvm::APInt ChunkSize(32, 1);
3306     ChunkExpr = IntegerLiteral::Create(
3307         CGF.getContext(), ChunkSize,
3308         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
3309         SourceLocation());
3310   }
3311 }
3312 
3313 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
3314                                       OpenMPDirectiveKind Kind, bool EmitChecks,
3315                                       bool ForceSimpleCall) {
3316   if (!CGF.HaveInsertPoint())
3317     return;
3318   // Build call __kmpc_cancel_barrier(loc, thread_id);
3319   // Build call __kmpc_barrier(loc, thread_id);
3320   unsigned Flags = getDefaultFlagsForBarriers(Kind);
3321   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3322   // thread_id);
3323   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3324                          getThreadID(CGF, Loc)};
3325   if (auto *OMPRegionInfo =
3326           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
3327     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
3328       llvm::Value *Result = CGF.EmitRuntimeCall(
3329           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
3330       if (EmitChecks) {
3331         // if (__kmpc_cancel_barrier()) {
3332         //   exit from construct;
3333         // }
3334         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
3335         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
3336         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
3337         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3338         CGF.EmitBlock(ExitBB);
3339         //   exit from construct;
3340         CodeGenFunction::JumpDest CancelDestination =
3341             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3342         CGF.EmitBranchThroughCleanup(CancelDestination);
3343         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3344       }
3345       return;
3346     }
3347   }
3348   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
3349 }
3350 
3351 /// Map the OpenMP loop schedule to the runtime enumeration.
3352 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
3353                                           bool Chunked, bool Ordered) {
3354   switch (ScheduleKind) {
3355   case OMPC_SCHEDULE_static:
3356     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
3357                    : (Ordered ? OMP_ord_static : OMP_sch_static);
3358   case OMPC_SCHEDULE_dynamic:
3359     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
3360   case OMPC_SCHEDULE_guided:
3361     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
3362   case OMPC_SCHEDULE_runtime:
3363     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
3364   case OMPC_SCHEDULE_auto:
3365     return Ordered ? OMP_ord_auto : OMP_sch_auto;
3366   case OMPC_SCHEDULE_unknown:
3367     assert(!Chunked && "chunk was specified but schedule kind not known");
3368     return Ordered ? OMP_ord_static : OMP_sch_static;
3369   }
3370   llvm_unreachable("Unexpected runtime schedule");
3371 }
3372 
3373 /// Map the OpenMP distribute schedule to the runtime enumeration.
3374 static OpenMPSchedType
3375 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
3376   // only static is allowed for dist_schedule
3377   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
3378 }
3379 
3380 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
3381                                          bool Chunked) const {
3382   OpenMPSchedType Schedule =
3383       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3384   return Schedule == OMP_sch_static;
3385 }
3386 
3387 bool CGOpenMPRuntime::isStaticNonchunked(
3388     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3389   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3390   return Schedule == OMP_dist_sch_static;
3391 }
3392 
3393 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
3394                                       bool Chunked) const {
3395   OpenMPSchedType Schedule =
3396       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3397   return Schedule == OMP_sch_static_chunked;
3398 }
3399 
3400 bool CGOpenMPRuntime::isStaticChunked(
3401     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3402   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3403   return Schedule == OMP_dist_sch_static_chunked;
3404 }
3405 
3406 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
3407   OpenMPSchedType Schedule =
3408       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3409   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3410   return Schedule != OMP_sch_static;
3411 }
3412 
3413 static int addMonoNonMonoModifier(OpenMPSchedType Schedule,
3414                                   OpenMPScheduleClauseModifier M1,
3415                                   OpenMPScheduleClauseModifier M2) {
3416   int Modifier = 0;
3417   switch (M1) {
3418   case OMPC_SCHEDULE_MODIFIER_monotonic:
3419     Modifier = OMP_sch_modifier_monotonic;
3420     break;
3421   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3422     Modifier = OMP_sch_modifier_nonmonotonic;
3423     break;
3424   case OMPC_SCHEDULE_MODIFIER_simd:
3425     if (Schedule == OMP_sch_static_chunked)
3426       Schedule = OMP_sch_static_balanced_chunked;
3427     break;
3428   case OMPC_SCHEDULE_MODIFIER_last:
3429   case OMPC_SCHEDULE_MODIFIER_unknown:
3430     break;
3431   }
3432   switch (M2) {
3433   case OMPC_SCHEDULE_MODIFIER_monotonic:
3434     Modifier = OMP_sch_modifier_monotonic;
3435     break;
3436   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3437     Modifier = OMP_sch_modifier_nonmonotonic;
3438     break;
3439   case OMPC_SCHEDULE_MODIFIER_simd:
3440     if (Schedule == OMP_sch_static_chunked)
3441       Schedule = OMP_sch_static_balanced_chunked;
3442     break;
3443   case OMPC_SCHEDULE_MODIFIER_last:
3444   case OMPC_SCHEDULE_MODIFIER_unknown:
3445     break;
3446   }
3447   return Schedule | Modifier;
3448 }
3449 
3450 void CGOpenMPRuntime::emitForDispatchInit(
3451     CodeGenFunction &CGF, SourceLocation Loc,
3452     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3453     bool Ordered, const DispatchRTInput &DispatchValues) {
3454   if (!CGF.HaveInsertPoint())
3455     return;
3456   OpenMPSchedType Schedule = getRuntimeSchedule(
3457       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3458   assert(Ordered ||
3459          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3460           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3461           Schedule != OMP_sch_static_balanced_chunked));
3462   // Call __kmpc_dispatch_init(
3463   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3464   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
3465   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
3466 
3467   // If the Chunk was not specified in the clause - use default value 1.
3468   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3469                                             : CGF.Builder.getIntN(IVSize, 1);
3470   llvm::Value *Args[] = {
3471       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3472       CGF.Builder.getInt32(addMonoNonMonoModifier(
3473           Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3474       DispatchValues.LB,                                // Lower
3475       DispatchValues.UB,                                // Upper
3476       CGF.Builder.getIntN(IVSize, 1),                   // Stride
3477       Chunk                                             // Chunk
3478   };
3479   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3480 }
3481 
3482 static void emitForStaticInitCall(
3483     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3484     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
3485     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
3486     const CGOpenMPRuntime::StaticRTInput &Values) {
3487   if (!CGF.HaveInsertPoint())
3488     return;
3489 
3490   assert(!Values.Ordered);
3491   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3492          Schedule == OMP_sch_static_balanced_chunked ||
3493          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3494          Schedule == OMP_dist_sch_static ||
3495          Schedule == OMP_dist_sch_static_chunked);
3496 
3497   // Call __kmpc_for_static_init(
3498   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3499   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3500   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3501   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
3502   llvm::Value *Chunk = Values.Chunk;
3503   if (Chunk == nullptr) {
3504     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3505             Schedule == OMP_dist_sch_static) &&
3506            "expected static non-chunked schedule");
3507     // If the Chunk was not specified in the clause - use default value 1.
3508     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3509   } else {
3510     assert((Schedule == OMP_sch_static_chunked ||
3511             Schedule == OMP_sch_static_balanced_chunked ||
3512             Schedule == OMP_ord_static_chunked ||
3513             Schedule == OMP_dist_sch_static_chunked) &&
3514            "expected static chunked schedule");
3515   }
3516   llvm::Value *Args[] = {
3517       UpdateLocation,
3518       ThreadId,
3519       CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1,
3520                                                   M2)), // Schedule type
3521       Values.IL.getPointer(),                           // &isLastIter
3522       Values.LB.getPointer(),                           // &LB
3523       Values.UB.getPointer(),                           // &UB
3524       Values.ST.getPointer(),                           // &Stride
3525       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
3526       Chunk                                             // Chunk
3527   };
3528   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3529 }
3530 
3531 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
3532                                         SourceLocation Loc,
3533                                         OpenMPDirectiveKind DKind,
3534                                         const OpenMPScheduleTy &ScheduleKind,
3535                                         const StaticRTInput &Values) {
3536   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3537       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3538   assert(isOpenMPWorksharingDirective(DKind) &&
3539          "Expected loop-based or sections-based directive.");
3540   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3541                                              isOpenMPLoopDirective(DKind)
3542                                                  ? OMP_IDENT_WORK_LOOP
3543                                                  : OMP_IDENT_WORK_SECTIONS);
3544   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3545   llvm::FunctionCallee StaticInitFunction =
3546       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3547   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3548                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3549 }
3550 
3551 void CGOpenMPRuntime::emitDistributeStaticInit(
3552     CodeGenFunction &CGF, SourceLocation Loc,
3553     OpenMPDistScheduleClauseKind SchedKind,
3554     const CGOpenMPRuntime::StaticRTInput &Values) {
3555   OpenMPSchedType ScheduleNum =
3556       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3557   llvm::Value *UpdatedLocation =
3558       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3559   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3560   llvm::FunctionCallee StaticInitFunction =
3561       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3562   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3563                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3564                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
3565 }
3566 
3567 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
3568                                           SourceLocation Loc,
3569                                           OpenMPDirectiveKind DKind) {
3570   if (!CGF.HaveInsertPoint())
3571     return;
3572   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3573   llvm::Value *Args[] = {
3574       emitUpdateLocation(CGF, Loc,
3575                          isOpenMPDistributeDirective(DKind)
3576                              ? OMP_IDENT_WORK_DISTRIBUTE
3577                              : isOpenMPLoopDirective(DKind)
3578                                    ? OMP_IDENT_WORK_LOOP
3579                                    : OMP_IDENT_WORK_SECTIONS),
3580       getThreadID(CGF, Loc)};
3581   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
3582                       Args);
3583 }
3584 
3585 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
3586                                                  SourceLocation Loc,
3587                                                  unsigned IVSize,
3588                                                  bool IVSigned) {
3589   if (!CGF.HaveInsertPoint())
3590     return;
3591   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3592   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3593   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3594 }
3595 
3596 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
3597                                           SourceLocation Loc, unsigned IVSize,
3598                                           bool IVSigned, Address IL,
3599                                           Address LB, Address UB,
3600                                           Address ST) {
3601   // Call __kmpc_dispatch_next(
3602   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3603   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3604   //          kmp_int[32|64] *p_stride);
3605   llvm::Value *Args[] = {
3606       emitUpdateLocation(CGF, Loc),
3607       getThreadID(CGF, Loc),
3608       IL.getPointer(), // &isLastIter
3609       LB.getPointer(), // &Lower
3610       UB.getPointer(), // &Upper
3611       ST.getPointer()  // &Stride
3612   };
3613   llvm::Value *Call =
3614       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3615   return CGF.EmitScalarConversion(
3616       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
3617       CGF.getContext().BoolTy, Loc);
3618 }
3619 
3620 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
3621                                            llvm::Value *NumThreads,
3622                                            SourceLocation Loc) {
3623   if (!CGF.HaveInsertPoint())
3624     return;
3625   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3626   llvm::Value *Args[] = {
3627       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3628       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3629   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
3630                       Args);
3631 }
3632 
3633 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
3634                                          OpenMPProcBindClauseKind ProcBind,
3635                                          SourceLocation Loc) {
3636   if (!CGF.HaveInsertPoint())
3637     return;
3638   // Constants for proc bind value accepted by the runtime.
3639   enum ProcBindTy {
3640     ProcBindFalse = 0,
3641     ProcBindTrue,
3642     ProcBindMaster,
3643     ProcBindClose,
3644     ProcBindSpread,
3645     ProcBindIntel,
3646     ProcBindDefault
3647   } RuntimeProcBind;
3648   switch (ProcBind) {
3649   case OMPC_PROC_BIND_master:
3650     RuntimeProcBind = ProcBindMaster;
3651     break;
3652   case OMPC_PROC_BIND_close:
3653     RuntimeProcBind = ProcBindClose;
3654     break;
3655   case OMPC_PROC_BIND_spread:
3656     RuntimeProcBind = ProcBindSpread;
3657     break;
3658   case OMPC_PROC_BIND_unknown:
3659     llvm_unreachable("Unsupported proc_bind value.");
3660   }
3661   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3662   llvm::Value *Args[] = {
3663       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3664       llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
3665   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
3666 }
3667 
3668 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3669                                 SourceLocation Loc) {
3670   if (!CGF.HaveInsertPoint())
3671     return;
3672   // Build call void __kmpc_flush(ident_t *loc)
3673   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
3674                       emitUpdateLocation(CGF, Loc));
3675 }
3676 
3677 namespace {
3678 /// Indexes of fields for type kmp_task_t.
3679 enum KmpTaskTFields {
3680   /// List of shared variables.
3681   KmpTaskTShareds,
3682   /// Task routine.
3683   KmpTaskTRoutine,
3684   /// Partition id for the untied tasks.
3685   KmpTaskTPartId,
3686   /// Function with call of destructors for private variables.
3687   Data1,
3688   /// Task priority.
3689   Data2,
3690   /// (Taskloops only) Lower bound.
3691   KmpTaskTLowerBound,
3692   /// (Taskloops only) Upper bound.
3693   KmpTaskTUpperBound,
3694   /// (Taskloops only) Stride.
3695   KmpTaskTStride,
3696   /// (Taskloops only) Is last iteration flag.
3697   KmpTaskTLastIter,
3698   /// (Taskloops only) Reduction data.
3699   KmpTaskTReductions,
3700 };
3701 } // anonymous namespace
3702 
3703 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3704   return OffloadEntriesTargetRegion.empty() &&
3705          OffloadEntriesDeviceGlobalVar.empty();
3706 }
3707 
3708 /// Initialize target region entry.
3709 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3710     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3711                                     StringRef ParentName, unsigned LineNum,
3712                                     unsigned Order) {
3713   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3714                                              "only required for the device "
3715                                              "code generation.");
3716   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3717       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3718                                    OMPTargetRegionEntryTargetRegion);
3719   ++OffloadingEntriesNum;
3720 }
3721 
3722 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3723     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3724                                   StringRef ParentName, unsigned LineNum,
3725                                   llvm::Constant *Addr, llvm::Constant *ID,
3726                                   OMPTargetRegionEntryKind Flags) {
3727   // If we are emitting code for a target, the entry is already initialized,
3728   // only has to be registered.
3729   if (CGM.getLangOpts().OpenMPIsDevice) {
3730     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3731       unsigned DiagID = CGM.getDiags().getCustomDiagID(
3732           DiagnosticsEngine::Error,
3733           "Unable to find target region on line '%0' in the device code.");
3734       CGM.getDiags().Report(DiagID) << LineNum;
3735       return;
3736     }
3737     auto &Entry =
3738         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3739     assert(Entry.isValid() && "Entry not initialized!");
3740     Entry.setAddress(Addr);
3741     Entry.setID(ID);
3742     Entry.setFlags(Flags);
3743   } else {
3744     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3745     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3746     ++OffloadingEntriesNum;
3747   }
3748 }
3749 
3750 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3751     unsigned DeviceID, unsigned FileID, StringRef ParentName,
3752     unsigned LineNum) const {
3753   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3754   if (PerDevice == OffloadEntriesTargetRegion.end())
3755     return false;
3756   auto PerFile = PerDevice->second.find(FileID);
3757   if (PerFile == PerDevice->second.end())
3758     return false;
3759   auto PerParentName = PerFile->second.find(ParentName);
3760   if (PerParentName == PerFile->second.end())
3761     return false;
3762   auto PerLine = PerParentName->second.find(LineNum);
3763   if (PerLine == PerParentName->second.end())
3764     return false;
3765   // Fail if this entry is already registered.
3766   if (PerLine->second.getAddress() || PerLine->second.getID())
3767     return false;
3768   return true;
3769 }
3770 
3771 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3772     const OffloadTargetRegionEntryInfoActTy &Action) {
3773   // Scan all target region entries and perform the provided action.
3774   for (const auto &D : OffloadEntriesTargetRegion)
3775     for (const auto &F : D.second)
3776       for (const auto &P : F.second)
3777         for (const auto &L : P.second)
3778           Action(D.first, F.first, P.first(), L.first, L.second);
3779 }
3780 
3781 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3782     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3783                                        OMPTargetGlobalVarEntryKind Flags,
3784                                        unsigned Order) {
3785   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3786                                              "only required for the device "
3787                                              "code generation.");
3788   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3789   ++OffloadingEntriesNum;
3790 }
3791 
3792 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3793     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3794                                      CharUnits VarSize,
3795                                      OMPTargetGlobalVarEntryKind Flags,
3796                                      llvm::GlobalValue::LinkageTypes Linkage) {
3797   if (CGM.getLangOpts().OpenMPIsDevice) {
3798     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3799     assert(Entry.isValid() && Entry.getFlags() == Flags &&
3800            "Entry not initialized!");
3801     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3802            "Resetting with the new address.");
3803     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3804       if (Entry.getVarSize().isZero()) {
3805         Entry.setVarSize(VarSize);
3806         Entry.setLinkage(Linkage);
3807       }
3808       return;
3809     }
3810     Entry.setVarSize(VarSize);
3811     Entry.setLinkage(Linkage);
3812     Entry.setAddress(Addr);
3813   } else {
3814     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3815       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3816       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3817              "Entry not initialized!");
3818       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3819              "Resetting with the new address.");
3820       if (Entry.getVarSize().isZero()) {
3821         Entry.setVarSize(VarSize);
3822         Entry.setLinkage(Linkage);
3823       }
3824       return;
3825     }
3826     OffloadEntriesDeviceGlobalVar.try_emplace(
3827         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3828     ++OffloadingEntriesNum;
3829   }
3830 }
3831 
3832 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3833     actOnDeviceGlobalVarEntriesInfo(
3834         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3835   // Scan all target region entries and perform the provided action.
3836   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3837     Action(E.getKey(), E.getValue());
3838 }
3839 
3840 llvm::Function *
3841 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
3842   // If we don't have entries or if we are emitting code for the device, we
3843   // don't need to do anything.
3844   if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
3845     return nullptr;
3846 
3847   llvm::Module &M = CGM.getModule();
3848   ASTContext &C = CGM.getContext();
3849 
3850   // Get list of devices we care about
3851   const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples;
3852 
3853   // We should be creating an offloading descriptor only if there are devices
3854   // specified.
3855   assert(!Devices.empty() && "No OpenMP offloading devices??");
3856 
3857   // Create the external variables that will point to the begin and end of the
3858   // host entries section. These will be defined by the linker.
3859   llvm::Type *OffloadEntryTy =
3860       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
3861   std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"});
3862   auto *HostEntriesBegin = new llvm::GlobalVariable(
3863       M, OffloadEntryTy, /*isConstant=*/true,
3864       llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
3865       EntriesBeginName);
3866   std::string EntriesEndName = getName({"omp_offloading", "entries_end"});
3867   auto *HostEntriesEnd =
3868       new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true,
3869                                llvm::GlobalValue::ExternalLinkage,
3870                                /*Initializer=*/nullptr, EntriesEndName);
3871 
3872   // Create all device images
3873   auto *DeviceImageTy = cast<llvm::StructType>(
3874       CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy()));
3875   ConstantInitBuilder DeviceImagesBuilder(CGM);
3876   ConstantArrayBuilder DeviceImagesEntries =
3877       DeviceImagesBuilder.beginArray(DeviceImageTy);
3878 
3879   for (const llvm::Triple &Device : Devices) {
3880     StringRef T = Device.getTriple();
3881     std::string BeginName = getName({"omp_offloading", "img_start", ""});
3882     auto *ImgBegin = new llvm::GlobalVariable(
3883         M, CGM.Int8Ty, /*isConstant=*/true,
3884         llvm::GlobalValue::ExternalWeakLinkage,
3885         /*Initializer=*/nullptr, Twine(BeginName).concat(T));
3886     std::string EndName = getName({"omp_offloading", "img_end", ""});
3887     auto *ImgEnd = new llvm::GlobalVariable(
3888         M, CGM.Int8Ty, /*isConstant=*/true,
3889         llvm::GlobalValue::ExternalWeakLinkage,
3890         /*Initializer=*/nullptr, Twine(EndName).concat(T));
3891 
3892     llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin,
3893                               HostEntriesEnd};
3894     createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data,
3895                                              DeviceImagesEntries);
3896   }
3897 
3898   // Create device images global array.
3899   std::string ImagesName = getName({"omp_offloading", "device_images"});
3900   llvm::GlobalVariable *DeviceImages =
3901       DeviceImagesEntries.finishAndCreateGlobal(ImagesName,
3902                                                 CGM.getPointerAlign(),
3903                                                 /*isConstant=*/true);
3904   DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3905 
3906   // This is a Zero array to be used in the creation of the constant expressions
3907   llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
3908                              llvm::Constant::getNullValue(CGM.Int32Ty)};
3909 
3910   // Create the target region descriptor.
3911   llvm::Constant *Data[] = {
3912       llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()),
3913       llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(),
3914                                            DeviceImages, Index),
3915       HostEntriesBegin, HostEntriesEnd};
3916   std::string Descriptor = getName({"omp_offloading", "descriptor"});
3917   llvm::GlobalVariable *Desc = createGlobalStruct(
3918       CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor);
3919 
3920   // Emit code to register or unregister the descriptor at execution
3921   // startup or closing, respectively.
3922 
3923   llvm::Function *UnRegFn;
3924   {
3925     FunctionArgList Args;
3926     ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other);
3927     Args.push_back(&DummyPtr);
3928 
3929     CodeGenFunction CGF(CGM);
3930     // Disable debug info for global (de-)initializer because they are not part
3931     // of some particular construct.
3932     CGF.disableDebugInfo();
3933     const auto &FI =
3934         CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3935     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
3936     std::string UnregName = getName({"omp_offloading", "descriptor_unreg"});
3937     UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI);
3938     CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args);
3939     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
3940                         Desc);
3941     CGF.FinishFunction();
3942   }
3943   llvm::Function *RegFn;
3944   {
3945     CodeGenFunction CGF(CGM);
3946     // Disable debug info for global (de-)initializer because they are not part
3947     // of some particular construct.
3948     CGF.disableDebugInfo();
3949     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
3950     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
3951 
3952     // Encode offload target triples into the registration function name. It
3953     // will serve as a comdat key for the registration/unregistration code for
3954     // this particular combination of offloading targets.
3955     SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U);
3956     RegFnNameParts[0] = "omp_offloading";
3957     RegFnNameParts[1] = "descriptor_reg";
3958     llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2),
3959                     [](const llvm::Triple &T) -> const std::string& {
3960                       return T.getTriple();
3961                     });
3962     llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end());
3963     std::string Descriptor = getName(RegFnNameParts);
3964     RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI);
3965     CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList());
3966     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc);
3967     // Create a variable to drive the registration and unregistration of the
3968     // descriptor, so we can reuse the logic that emits Ctors and Dtors.
3969     ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(),
3970                                   SourceLocation(), nullptr, C.CharTy,
3971                                   ImplicitParamDecl::Other);
3972     CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
3973     CGF.FinishFunction();
3974   }
3975   if (CGM.supportsCOMDAT()) {
3976     // It is sufficient to call registration function only once, so create a
3977     // COMDAT group for registration/unregistration functions and associated
3978     // data. That would reduce startup time and code size. Registration
3979     // function serves as a COMDAT group key.
3980     llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName());
3981     RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
3982     RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility);
3983     RegFn->setComdat(ComdatKey);
3984     UnRegFn->setComdat(ComdatKey);
3985     DeviceImages->setComdat(ComdatKey);
3986     Desc->setComdat(ComdatKey);
3987   }
3988   return RegFn;
3989 }
3990 
3991 void CGOpenMPRuntime::createOffloadEntry(
3992     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3993     llvm::GlobalValue::LinkageTypes Linkage) {
3994   StringRef Name = Addr->getName();
3995   llvm::Module &M = CGM.getModule();
3996   llvm::LLVMContext &C = M.getContext();
3997 
3998   // Create constant string with the name.
3999   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
4000 
4001   std::string StringName = getName({"omp_offloading", "entry_name"});
4002   auto *Str = new llvm::GlobalVariable(
4003       M, StrPtrInit->getType(), /*isConstant=*/true,
4004       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
4005   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
4006 
4007   llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
4008                             llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
4009                             llvm::ConstantInt::get(CGM.SizeTy, Size),
4010                             llvm::ConstantInt::get(CGM.Int32Ty, Flags),
4011                             llvm::ConstantInt::get(CGM.Int32Ty, 0)};
4012   std::string EntryName = getName({"omp_offloading", "entry", ""});
4013   llvm::GlobalVariable *Entry = createGlobalStruct(
4014       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
4015       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
4016 
4017   // The entry has to be created in the section the linker expects it to be.
4018   std::string Section = getName({"omp_offloading", "entries"});
4019   Entry->setSection(Section);
4020 }
4021 
4022 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
4023   // Emit the offloading entries and metadata so that the device codegen side
4024   // can easily figure out what to emit. The produced metadata looks like
4025   // this:
4026   //
4027   // !omp_offload.info = !{!1, ...}
4028   //
4029   // Right now we only generate metadata for function that contain target
4030   // regions.
4031 
4032   // If we do not have entries, we don't need to do anything.
4033   if (OffloadEntriesInfoManager.empty())
4034     return;
4035 
4036   llvm::Module &M = CGM.getModule();
4037   llvm::LLVMContext &C = M.getContext();
4038   SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
4039       OrderedEntries(OffloadEntriesInfoManager.size());
4040   llvm::SmallVector<StringRef, 16> ParentFunctions(
4041       OffloadEntriesInfoManager.size());
4042 
4043   // Auxiliary methods to create metadata values and strings.
4044   auto &&GetMDInt = [this](unsigned V) {
4045     return llvm::ConstantAsMetadata::get(
4046         llvm::ConstantInt::get(CGM.Int32Ty, V));
4047   };
4048 
4049   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
4050 
4051   // Create the offloading info metadata node.
4052   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
4053 
4054   // Create function that emits metadata for each target region entry;
4055   auto &&TargetRegionMetadataEmitter =
4056       [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString](
4057           unsigned DeviceID, unsigned FileID, StringRef ParentName,
4058           unsigned Line,
4059           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
4060         // Generate metadata for target regions. Each entry of this metadata
4061         // contains:
4062         // - Entry 0 -> Kind of this type of metadata (0).
4063         // - Entry 1 -> Device ID of the file where the entry was identified.
4064         // - Entry 2 -> File ID of the file where the entry was identified.
4065         // - Entry 3 -> Mangled name of the function where the entry was
4066         // identified.
4067         // - Entry 4 -> Line in the file where the entry was identified.
4068         // - Entry 5 -> Order the entry was created.
4069         // The first element of the metadata node is the kind.
4070         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
4071                                  GetMDInt(FileID),      GetMDString(ParentName),
4072                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
4073 
4074         // Save this entry in the right position of the ordered entries array.
4075         OrderedEntries[E.getOrder()] = &E;
4076         ParentFunctions[E.getOrder()] = ParentName;
4077 
4078         // Add metadata to the named metadata node.
4079         MD->addOperand(llvm::MDNode::get(C, Ops));
4080       };
4081 
4082   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
4083       TargetRegionMetadataEmitter);
4084 
4085   // Create function that emits metadata for each device global variable entry;
4086   auto &&DeviceGlobalVarMetadataEmitter =
4087       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
4088        MD](StringRef MangledName,
4089            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
4090                &E) {
4091         // Generate metadata for global variables. Each entry of this metadata
4092         // contains:
4093         // - Entry 0 -> Kind of this type of metadata (1).
4094         // - Entry 1 -> Mangled name of the variable.
4095         // - Entry 2 -> Declare target kind.
4096         // - Entry 3 -> Order the entry was created.
4097         // The first element of the metadata node is the kind.
4098         llvm::Metadata *Ops[] = {
4099             GetMDInt(E.getKind()), GetMDString(MangledName),
4100             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
4101 
4102         // Save this entry in the right position of the ordered entries array.
4103         OrderedEntries[E.getOrder()] = &E;
4104 
4105         // Add metadata to the named metadata node.
4106         MD->addOperand(llvm::MDNode::get(C, Ops));
4107       };
4108 
4109   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
4110       DeviceGlobalVarMetadataEmitter);
4111 
4112   for (const auto *E : OrderedEntries) {
4113     assert(E && "All ordered entries must exist!");
4114     if (const auto *CE =
4115             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
4116                 E)) {
4117       if (!CE->getID() || !CE->getAddress()) {
4118         // Do not blame the entry if the parent funtion is not emitted.
4119         StringRef FnName = ParentFunctions[CE->getOrder()];
4120         if (!CGM.GetGlobalValue(FnName))
4121           continue;
4122         unsigned DiagID = CGM.getDiags().getCustomDiagID(
4123             DiagnosticsEngine::Error,
4124             "Offloading entry for target region is incorrect: either the "
4125             "address or the ID is invalid.");
4126         CGM.getDiags().Report(DiagID);
4127         continue;
4128       }
4129       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
4130                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
4131     } else if (const auto *CE =
4132                    dyn_cast<OffloadEntriesInfoManagerTy::
4133                                 OffloadEntryInfoDeviceGlobalVar>(E)) {
4134       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
4135           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4136               CE->getFlags());
4137       switch (Flags) {
4138       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
4139         if (!CE->getAddress()) {
4140           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4141               DiagnosticsEngine::Error,
4142               "Offloading entry for declare target variable is incorrect: the "
4143               "address is invalid.");
4144           CGM.getDiags().Report(DiagID);
4145           continue;
4146         }
4147         // The vaiable has no definition - no need to add the entry.
4148         if (CE->getVarSize().isZero())
4149           continue;
4150         break;
4151       }
4152       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
4153         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
4154                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
4155                "Declaret target link address is set.");
4156         if (CGM.getLangOpts().OpenMPIsDevice)
4157           continue;
4158         if (!CE->getAddress()) {
4159           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4160               DiagnosticsEngine::Error,
4161               "Offloading entry for declare target variable is incorrect: the "
4162               "address is invalid.");
4163           CGM.getDiags().Report(DiagID);
4164           continue;
4165         }
4166         break;
4167       }
4168       createOffloadEntry(CE->getAddress(), CE->getAddress(),
4169                          CE->getVarSize().getQuantity(), Flags,
4170                          CE->getLinkage());
4171     } else {
4172       llvm_unreachable("Unsupported entry kind.");
4173     }
4174   }
4175 }
4176 
4177 /// Loads all the offload entries information from the host IR
4178 /// metadata.
4179 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
4180   // If we are in target mode, load the metadata from the host IR. This code has
4181   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
4182 
4183   if (!CGM.getLangOpts().OpenMPIsDevice)
4184     return;
4185 
4186   if (CGM.getLangOpts().OMPHostIRFile.empty())
4187     return;
4188 
4189   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
4190   if (auto EC = Buf.getError()) {
4191     CGM.getDiags().Report(diag::err_cannot_open_file)
4192         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4193     return;
4194   }
4195 
4196   llvm::LLVMContext C;
4197   auto ME = expectedToErrorOrAndEmitErrors(
4198       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
4199 
4200   if (auto EC = ME.getError()) {
4201     unsigned DiagID = CGM.getDiags().getCustomDiagID(
4202         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
4203     CGM.getDiags().Report(DiagID)
4204         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4205     return;
4206   }
4207 
4208   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
4209   if (!MD)
4210     return;
4211 
4212   for (llvm::MDNode *MN : MD->operands()) {
4213     auto &&GetMDInt = [MN](unsigned Idx) {
4214       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
4215       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
4216     };
4217 
4218     auto &&GetMDString = [MN](unsigned Idx) {
4219       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
4220       return V->getString();
4221     };
4222 
4223     switch (GetMDInt(0)) {
4224     default:
4225       llvm_unreachable("Unexpected metadata!");
4226       break;
4227     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4228         OffloadingEntryInfoTargetRegion:
4229       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
4230           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
4231           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
4232           /*Order=*/GetMDInt(5));
4233       break;
4234     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4235         OffloadingEntryInfoDeviceGlobalVar:
4236       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
4237           /*MangledName=*/GetMDString(1),
4238           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4239               /*Flags=*/GetMDInt(2)),
4240           /*Order=*/GetMDInt(3));
4241       break;
4242     }
4243   }
4244 }
4245 
4246 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
4247   if (!KmpRoutineEntryPtrTy) {
4248     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
4249     ASTContext &C = CGM.getContext();
4250     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
4251     FunctionProtoType::ExtProtoInfo EPI;
4252     KmpRoutineEntryPtrQTy = C.getPointerType(
4253         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
4254     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
4255   }
4256 }
4257 
4258 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
4259   // Make sure the type of the entry is already created. This is the type we
4260   // have to create:
4261   // struct __tgt_offload_entry{
4262   //   void      *addr;       // Pointer to the offload entry info.
4263   //                          // (function or global)
4264   //   char      *name;       // Name of the function or global.
4265   //   size_t     size;       // Size of the entry info (0 if it a function).
4266   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
4267   //   int32_t    reserved;   // Reserved, to use by the runtime library.
4268   // };
4269   if (TgtOffloadEntryQTy.isNull()) {
4270     ASTContext &C = CGM.getContext();
4271     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
4272     RD->startDefinition();
4273     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4274     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
4275     addFieldToRecordDecl(C, RD, C.getSizeType());
4276     addFieldToRecordDecl(
4277         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4278     addFieldToRecordDecl(
4279         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4280     RD->completeDefinition();
4281     RD->addAttr(PackedAttr::CreateImplicit(C));
4282     TgtOffloadEntryQTy = C.getRecordType(RD);
4283   }
4284   return TgtOffloadEntryQTy;
4285 }
4286 
4287 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() {
4288   // These are the types we need to build:
4289   // struct __tgt_device_image{
4290   // void   *ImageStart;       // Pointer to the target code start.
4291   // void   *ImageEnd;         // Pointer to the target code end.
4292   // // We also add the host entries to the device image, as it may be useful
4293   // // for the target runtime to have access to that information.
4294   // __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all
4295   //                                       // the entries.
4296   // __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
4297   //                                       // entries (non inclusive).
4298   // };
4299   if (TgtDeviceImageQTy.isNull()) {
4300     ASTContext &C = CGM.getContext();
4301     RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image");
4302     RD->startDefinition();
4303     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4304     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4305     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4306     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4307     RD->completeDefinition();
4308     TgtDeviceImageQTy = C.getRecordType(RD);
4309   }
4310   return TgtDeviceImageQTy;
4311 }
4312 
4313 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() {
4314   // struct __tgt_bin_desc{
4315   //   int32_t              NumDevices;      // Number of devices supported.
4316   //   __tgt_device_image   *DeviceImages;   // Arrays of device images
4317   //                                         // (one per device).
4318   //   __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all the
4319   //                                         // entries.
4320   //   __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
4321   //                                         // entries (non inclusive).
4322   // };
4323   if (TgtBinaryDescriptorQTy.isNull()) {
4324     ASTContext &C = CGM.getContext();
4325     RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc");
4326     RD->startDefinition();
4327     addFieldToRecordDecl(
4328         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4329     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy()));
4330     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4331     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4332     RD->completeDefinition();
4333     TgtBinaryDescriptorQTy = C.getRecordType(RD);
4334   }
4335   return TgtBinaryDescriptorQTy;
4336 }
4337 
4338 namespace {
4339 struct PrivateHelpersTy {
4340   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
4341                    const VarDecl *PrivateElemInit)
4342       : Original(Original), PrivateCopy(PrivateCopy),
4343         PrivateElemInit(PrivateElemInit) {}
4344   const VarDecl *Original;
4345   const VarDecl *PrivateCopy;
4346   const VarDecl *PrivateElemInit;
4347 };
4348 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4349 } // anonymous namespace
4350 
4351 static RecordDecl *
4352 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
4353   if (!Privates.empty()) {
4354     ASTContext &C = CGM.getContext();
4355     // Build struct .kmp_privates_t. {
4356     //         /*  private vars  */
4357     //       };
4358     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
4359     RD->startDefinition();
4360     for (const auto &Pair : Privates) {
4361       const VarDecl *VD = Pair.second.Original;
4362       QualType Type = VD->getType().getNonReferenceType();
4363       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
4364       if (VD->hasAttrs()) {
4365         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4366              E(VD->getAttrs().end());
4367              I != E; ++I)
4368           FD->addAttr(*I);
4369       }
4370     }
4371     RD->completeDefinition();
4372     return RD;
4373   }
4374   return nullptr;
4375 }
4376 
4377 static RecordDecl *
4378 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
4379                          QualType KmpInt32Ty,
4380                          QualType KmpRoutineEntryPointerQTy) {
4381   ASTContext &C = CGM.getContext();
4382   // Build struct kmp_task_t {
4383   //         void *              shareds;
4384   //         kmp_routine_entry_t routine;
4385   //         kmp_int32           part_id;
4386   //         kmp_cmplrdata_t data1;
4387   //         kmp_cmplrdata_t data2;
4388   // For taskloops additional fields:
4389   //         kmp_uint64          lb;
4390   //         kmp_uint64          ub;
4391   //         kmp_int64           st;
4392   //         kmp_int32           liter;
4393   //         void *              reductions;
4394   //       };
4395   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4396   UD->startDefinition();
4397   addFieldToRecordDecl(C, UD, KmpInt32Ty);
4398   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4399   UD->completeDefinition();
4400   QualType KmpCmplrdataTy = C.getRecordType(UD);
4401   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
4402   RD->startDefinition();
4403   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4404   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4405   addFieldToRecordDecl(C, RD, KmpInt32Ty);
4406   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4407   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4408   if (isOpenMPTaskLoopDirective(Kind)) {
4409     QualType KmpUInt64Ty =
4410         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4411     QualType KmpInt64Ty =
4412         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4413     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4414     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4415     addFieldToRecordDecl(C, RD, KmpInt64Ty);
4416     addFieldToRecordDecl(C, RD, KmpInt32Ty);
4417     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4418   }
4419   RD->completeDefinition();
4420   return RD;
4421 }
4422 
4423 static RecordDecl *
4424 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
4425                                      ArrayRef<PrivateDataTy> Privates) {
4426   ASTContext &C = CGM.getContext();
4427   // Build struct kmp_task_t_with_privates {
4428   //         kmp_task_t task_data;
4429   //         .kmp_privates_t. privates;
4430   //       };
4431   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4432   RD->startDefinition();
4433   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4434   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
4435     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4436   RD->completeDefinition();
4437   return RD;
4438 }
4439 
4440 /// Emit a proxy function which accepts kmp_task_t as the second
4441 /// argument.
4442 /// \code
4443 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4444 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4445 ///   For taskloops:
4446 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4447 ///   tt->reductions, tt->shareds);
4448 ///   return 0;
4449 /// }
4450 /// \endcode
4451 static llvm::Function *
4452 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
4453                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4454                       QualType KmpTaskTWithPrivatesPtrQTy,
4455                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4456                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
4457                       llvm::Value *TaskPrivatesMap) {
4458   ASTContext &C = CGM.getContext();
4459   FunctionArgList Args;
4460   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4461                             ImplicitParamDecl::Other);
4462   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4463                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4464                                 ImplicitParamDecl::Other);
4465   Args.push_back(&GtidArg);
4466   Args.push_back(&TaskTypeArg);
4467   const auto &TaskEntryFnInfo =
4468       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4469   llvm::FunctionType *TaskEntryTy =
4470       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4471   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
4472   auto *TaskEntry = llvm::Function::Create(
4473       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4474   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4475   TaskEntry->setDoesNotRecurse();
4476   CodeGenFunction CGF(CGM);
4477   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4478                     Loc, Loc);
4479 
4480   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4481   // tt,
4482   // For taskloops:
4483   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4484   // tt->task_data.shareds);
4485   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
4486       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4487   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4488       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4489       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4490   const auto *KmpTaskTWithPrivatesQTyRD =
4491       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4492   LValue Base =
4493       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4494   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4495   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4496   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4497   llvm::Value *PartidParam = PartIdLVal.getPointer();
4498 
4499   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4500   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4501   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4502       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4503       CGF.ConvertTypeForMem(SharedsPtrTy));
4504 
4505   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4506   llvm::Value *PrivatesParam;
4507   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4508     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4509     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4510         PrivatesLVal.getPointer(), CGF.VoidPtrTy);
4511   } else {
4512     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4513   }
4514 
4515   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4516                                TaskPrivatesMap,
4517                                CGF.Builder
4518                                    .CreatePointerBitCastOrAddrSpaceCast(
4519                                        TDBase.getAddress(), CGF.VoidPtrTy)
4520                                    .getPointer()};
4521   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4522                                           std::end(CommonArgs));
4523   if (isOpenMPTaskLoopDirective(Kind)) {
4524     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4525     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4526     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4527     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4528     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4529     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4530     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4531     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
4532     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4533     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4534     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4535     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4536     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4537     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
4538     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4539     CallArgs.push_back(LBParam);
4540     CallArgs.push_back(UBParam);
4541     CallArgs.push_back(StParam);
4542     CallArgs.push_back(LIParam);
4543     CallArgs.push_back(RParam);
4544   }
4545   CallArgs.push_back(SharedsParam);
4546 
4547   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4548                                                   CallArgs);
4549   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4550                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4551   CGF.FinishFunction();
4552   return TaskEntry;
4553 }
4554 
4555 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
4556                                             SourceLocation Loc,
4557                                             QualType KmpInt32Ty,
4558                                             QualType KmpTaskTWithPrivatesPtrQTy,
4559                                             QualType KmpTaskTWithPrivatesQTy) {
4560   ASTContext &C = CGM.getContext();
4561   FunctionArgList Args;
4562   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4563                             ImplicitParamDecl::Other);
4564   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4565                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4566                                 ImplicitParamDecl::Other);
4567   Args.push_back(&GtidArg);
4568   Args.push_back(&TaskTypeArg);
4569   const auto &DestructorFnInfo =
4570       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4571   llvm::FunctionType *DestructorFnTy =
4572       CGM.getTypes().GetFunctionType(DestructorFnInfo);
4573   std::string Name =
4574       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
4575   auto *DestructorFn =
4576       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
4577                              Name, &CGM.getModule());
4578   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4579                                     DestructorFnInfo);
4580   DestructorFn->setDoesNotRecurse();
4581   CodeGenFunction CGF(CGM);
4582   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4583                     Args, Loc, Loc);
4584 
4585   LValue Base = CGF.EmitLoadOfPointerLValue(
4586       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4587       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4588   const auto *KmpTaskTWithPrivatesQTyRD =
4589       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4590   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4591   Base = CGF.EmitLValueForField(Base, *FI);
4592   for (const auto *Field :
4593        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4594     if (QualType::DestructionKind DtorKind =
4595             Field->getType().isDestructedType()) {
4596       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
4597       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
4598     }
4599   }
4600   CGF.FinishFunction();
4601   return DestructorFn;
4602 }
4603 
4604 /// Emit a privates mapping function for correct handling of private and
4605 /// firstprivate variables.
4606 /// \code
4607 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4608 /// **noalias priv1,...,  <tyn> **noalias privn) {
4609 ///   *priv1 = &.privates.priv1;
4610 ///   ...;
4611 ///   *privn = &.privates.privn;
4612 /// }
4613 /// \endcode
4614 static llvm::Value *
4615 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
4616                                ArrayRef<const Expr *> PrivateVars,
4617                                ArrayRef<const Expr *> FirstprivateVars,
4618                                ArrayRef<const Expr *> LastprivateVars,
4619                                QualType PrivatesQTy,
4620                                ArrayRef<PrivateDataTy> Privates) {
4621   ASTContext &C = CGM.getContext();
4622   FunctionArgList Args;
4623   ImplicitParamDecl TaskPrivatesArg(
4624       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4625       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4626       ImplicitParamDecl::Other);
4627   Args.push_back(&TaskPrivatesArg);
4628   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4629   unsigned Counter = 1;
4630   for (const Expr *E : PrivateVars) {
4631     Args.push_back(ImplicitParamDecl::Create(
4632         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4633         C.getPointerType(C.getPointerType(E->getType()))
4634             .withConst()
4635             .withRestrict(),
4636         ImplicitParamDecl::Other));
4637     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4638     PrivateVarsPos[VD] = Counter;
4639     ++Counter;
4640   }
4641   for (const Expr *E : FirstprivateVars) {
4642     Args.push_back(ImplicitParamDecl::Create(
4643         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4644         C.getPointerType(C.getPointerType(E->getType()))
4645             .withConst()
4646             .withRestrict(),
4647         ImplicitParamDecl::Other));
4648     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4649     PrivateVarsPos[VD] = Counter;
4650     ++Counter;
4651   }
4652   for (const Expr *E : LastprivateVars) {
4653     Args.push_back(ImplicitParamDecl::Create(
4654         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4655         C.getPointerType(C.getPointerType(E->getType()))
4656             .withConst()
4657             .withRestrict(),
4658         ImplicitParamDecl::Other));
4659     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4660     PrivateVarsPos[VD] = Counter;
4661     ++Counter;
4662   }
4663   const auto &TaskPrivatesMapFnInfo =
4664       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4665   llvm::FunctionType *TaskPrivatesMapTy =
4666       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4667   std::string Name =
4668       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
4669   auto *TaskPrivatesMap = llvm::Function::Create(
4670       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
4671       &CGM.getModule());
4672   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
4673                                     TaskPrivatesMapFnInfo);
4674   TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4675   TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4676   TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4677   CodeGenFunction CGF(CGM);
4678   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4679                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
4680 
4681   // *privi = &.privates.privi;
4682   LValue Base = CGF.EmitLoadOfPointerLValue(
4683       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4684       TaskPrivatesArg.getType()->castAs<PointerType>());
4685   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4686   Counter = 0;
4687   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
4688     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
4689     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4690     LValue RefLVal =
4691         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4692     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4693         RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
4694     CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
4695     ++Counter;
4696   }
4697   CGF.FinishFunction();
4698   return TaskPrivatesMap;
4699 }
4700 
4701 /// Emit initialization for private variables in task-based directives.
4702 static void emitPrivatesInit(CodeGenFunction &CGF,
4703                              const OMPExecutableDirective &D,
4704                              Address KmpTaskSharedsPtr, LValue TDBase,
4705                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4706                              QualType SharedsTy, QualType SharedsPtrTy,
4707                              const OMPTaskDataTy &Data,
4708                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4709   ASTContext &C = CGF.getContext();
4710   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4711   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4712   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
4713                                  ? OMPD_taskloop
4714                                  : OMPD_task;
4715   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
4716   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
4717   LValue SrcBase;
4718   bool IsTargetTask =
4719       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
4720       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
4721   // For target-based directives skip 3 firstprivate arrays BasePointersArray,
4722   // PointersArray and SizesArray. The original variables for these arrays are
4723   // not captured and we get their addresses explicitly.
4724   if ((!IsTargetTask && !Data.FirstprivateVars.empty()) ||
4725       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
4726     SrcBase = CGF.MakeAddrLValue(
4727         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4728             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4729         SharedsTy);
4730   }
4731   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4732   for (const PrivateDataTy &Pair : Privates) {
4733     const VarDecl *VD = Pair.second.PrivateCopy;
4734     const Expr *Init = VD->getAnyInitializer();
4735     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4736                              !CGF.isTrivialInitializer(Init)))) {
4737       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4738       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
4739         const VarDecl *OriginalVD = Pair.second.Original;
4740         // Check if the variable is the target-based BasePointersArray,
4741         // PointersArray or SizesArray.
4742         LValue SharedRefLValue;
4743         QualType Type = PrivateLValue.getType();
4744         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
4745         if (IsTargetTask && !SharedField) {
4746           assert(isa<ImplicitParamDecl>(OriginalVD) &&
4747                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
4748                  cast<CapturedDecl>(OriginalVD->getDeclContext())
4749                          ->getNumParams() == 0 &&
4750                  isa<TranslationUnitDecl>(
4751                      cast<CapturedDecl>(OriginalVD->getDeclContext())
4752                          ->getDeclContext()) &&
4753                  "Expected artificial target data variable.");
4754           SharedRefLValue =
4755               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
4756         } else {
4757           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4758           SharedRefLValue = CGF.MakeAddrLValue(
4759               Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
4760               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
4761               SharedRefLValue.getTBAAInfo());
4762         }
4763         if (Type->isArrayType()) {
4764           // Initialize firstprivate array.
4765           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4766             // Perform simple memcpy.
4767             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
4768           } else {
4769             // Initialize firstprivate array using element-by-element
4770             // initialization.
4771             CGF.EmitOMPAggregateAssign(
4772                 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
4773                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4774                                                   Address SrcElement) {
4775                   // Clean up any temporaries needed by the initialization.
4776                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
4777                   InitScope.addPrivate(
4778                       Elem, [SrcElement]() -> Address { return SrcElement; });
4779                   (void)InitScope.Privatize();
4780                   // Emit initialization for single element.
4781                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
4782                       CGF, &CapturesInfo);
4783                   CGF.EmitAnyExprToMem(Init, DestElement,
4784                                        Init->getType().getQualifiers(),
4785                                        /*IsInitializer=*/false);
4786                 });
4787           }
4788         } else {
4789           CodeGenFunction::OMPPrivateScope InitScope(CGF);
4790           InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
4791             return SharedRefLValue.getAddress();
4792           });
4793           (void)InitScope.Privatize();
4794           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4795           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4796                              /*capturedByInit=*/false);
4797         }
4798       } else {
4799         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4800       }
4801     }
4802     ++FI;
4803   }
4804 }
4805 
4806 /// Check if duplication function is required for taskloops.
4807 static bool checkInitIsRequired(CodeGenFunction &CGF,
4808                                 ArrayRef<PrivateDataTy> Privates) {
4809   bool InitRequired = false;
4810   for (const PrivateDataTy &Pair : Privates) {
4811     const VarDecl *VD = Pair.second.PrivateCopy;
4812     const Expr *Init = VD->getAnyInitializer();
4813     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4814                                     !CGF.isTrivialInitializer(Init));
4815     if (InitRequired)
4816       break;
4817   }
4818   return InitRequired;
4819 }
4820 
4821 
4822 /// Emit task_dup function (for initialization of
4823 /// private/firstprivate/lastprivate vars and last_iter flag)
4824 /// \code
4825 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4826 /// lastpriv) {
4827 /// // setup lastprivate flag
4828 ///    task_dst->last = lastpriv;
4829 /// // could be constructor calls here...
4830 /// }
4831 /// \endcode
4832 static llvm::Value *
4833 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4834                     const OMPExecutableDirective &D,
4835                     QualType KmpTaskTWithPrivatesPtrQTy,
4836                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4837                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4838                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4839                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4840   ASTContext &C = CGM.getContext();
4841   FunctionArgList Args;
4842   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4843                            KmpTaskTWithPrivatesPtrQTy,
4844                            ImplicitParamDecl::Other);
4845   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4846                            KmpTaskTWithPrivatesPtrQTy,
4847                            ImplicitParamDecl::Other);
4848   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4849                                 ImplicitParamDecl::Other);
4850   Args.push_back(&DstArg);
4851   Args.push_back(&SrcArg);
4852   Args.push_back(&LastprivArg);
4853   const auto &TaskDupFnInfo =
4854       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4855   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4856   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4857   auto *TaskDup = llvm::Function::Create(
4858       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4859   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4860   TaskDup->setDoesNotRecurse();
4861   CodeGenFunction CGF(CGM);
4862   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4863                     Loc);
4864 
4865   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4866       CGF.GetAddrOfLocalVar(&DstArg),
4867       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4868   // task_dst->liter = lastpriv;
4869   if (WithLastIter) {
4870     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4871     LValue Base = CGF.EmitLValueForField(
4872         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4873     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4874     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4875         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4876     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4877   }
4878 
4879   // Emit initial values for private copies (if any).
4880   assert(!Privates.empty());
4881   Address KmpTaskSharedsPtr = Address::invalid();
4882   if (!Data.FirstprivateVars.empty()) {
4883     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4884         CGF.GetAddrOfLocalVar(&SrcArg),
4885         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4886     LValue Base = CGF.EmitLValueForField(
4887         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4888     KmpTaskSharedsPtr = Address(
4889         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4890                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4891                                                   KmpTaskTShareds)),
4892                              Loc),
4893         CGF.getNaturalTypeAlignment(SharedsTy));
4894   }
4895   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4896                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4897   CGF.FinishFunction();
4898   return TaskDup;
4899 }
4900 
4901 /// Checks if destructor function is required to be generated.
4902 /// \return true if cleanups are required, false otherwise.
4903 static bool
4904 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4905   bool NeedsCleanup = false;
4906   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4907   const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4908   for (const FieldDecl *FD : PrivateRD->fields()) {
4909     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4910     if (NeedsCleanup)
4911       break;
4912   }
4913   return NeedsCleanup;
4914 }
4915 
4916 CGOpenMPRuntime::TaskResultTy
4917 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4918                               const OMPExecutableDirective &D,
4919                               llvm::Function *TaskFunction, QualType SharedsTy,
4920                               Address Shareds, const OMPTaskDataTy &Data) {
4921   ASTContext &C = CGM.getContext();
4922   llvm::SmallVector<PrivateDataTy, 4> Privates;
4923   // Aggregate privates and sort them by the alignment.
4924   auto I = Data.PrivateCopies.begin();
4925   for (const Expr *E : Data.PrivateVars) {
4926     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4927     Privates.emplace_back(
4928         C.getDeclAlign(VD),
4929         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4930                          /*PrivateElemInit=*/nullptr));
4931     ++I;
4932   }
4933   I = Data.FirstprivateCopies.begin();
4934   auto IElemInitRef = Data.FirstprivateInits.begin();
4935   for (const Expr *E : Data.FirstprivateVars) {
4936     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4937     Privates.emplace_back(
4938         C.getDeclAlign(VD),
4939         PrivateHelpersTy(
4940             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4941             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4942     ++I;
4943     ++IElemInitRef;
4944   }
4945   I = Data.LastprivateCopies.begin();
4946   for (const Expr *E : Data.LastprivateVars) {
4947     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4948     Privates.emplace_back(
4949         C.getDeclAlign(VD),
4950         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4951                          /*PrivateElemInit=*/nullptr));
4952     ++I;
4953   }
4954   llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
4955     return L.first > R.first;
4956   });
4957   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4958   // Build type kmp_routine_entry_t (if not built yet).
4959   emitKmpRoutineEntryT(KmpInt32Ty);
4960   // Build type kmp_task_t (if not built yet).
4961   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4962     if (SavedKmpTaskloopTQTy.isNull()) {
4963       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4964           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4965     }
4966     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4967   } else {
4968     assert((D.getDirectiveKind() == OMPD_task ||
4969             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4970             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4971            "Expected taskloop, task or target directive");
4972     if (SavedKmpTaskTQTy.isNull()) {
4973       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4974           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4975     }
4976     KmpTaskTQTy = SavedKmpTaskTQTy;
4977   }
4978   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4979   // Build particular struct kmp_task_t for the given task.
4980   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4981       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4982   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4983   QualType KmpTaskTWithPrivatesPtrQTy =
4984       C.getPointerType(KmpTaskTWithPrivatesQTy);
4985   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4986   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4987       KmpTaskTWithPrivatesTy->getPointerTo();
4988   llvm::Value *KmpTaskTWithPrivatesTySize =
4989       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4990   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4991 
4992   // Emit initial values for private copies (if any).
4993   llvm::Value *TaskPrivatesMap = nullptr;
4994   llvm::Type *TaskPrivatesMapTy =
4995       std::next(TaskFunction->arg_begin(), 3)->getType();
4996   if (!Privates.empty()) {
4997     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4998     TaskPrivatesMap = emitTaskPrivateMappingFunction(
4999         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
5000         FI->getType(), Privates);
5001     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5002         TaskPrivatesMap, TaskPrivatesMapTy);
5003   } else {
5004     TaskPrivatesMap = llvm::ConstantPointerNull::get(
5005         cast<llvm::PointerType>(TaskPrivatesMapTy));
5006   }
5007   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
5008   // kmp_task_t *tt);
5009   llvm::Function *TaskEntry = emitProxyTaskFunction(
5010       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5011       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
5012       TaskPrivatesMap);
5013 
5014   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
5015   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
5016   // kmp_routine_entry_t *task_entry);
5017   // Task flags. Format is taken from
5018   // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
5019   // description of kmp_tasking_flags struct.
5020   enum {
5021     TiedFlag = 0x1,
5022     FinalFlag = 0x2,
5023     DestructorsFlag = 0x8,
5024     PriorityFlag = 0x20
5025   };
5026   unsigned Flags = Data.Tied ? TiedFlag : 0;
5027   bool NeedsCleanup = false;
5028   if (!Privates.empty()) {
5029     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
5030     if (NeedsCleanup)
5031       Flags = Flags | DestructorsFlag;
5032   }
5033   if (Data.Priority.getInt())
5034     Flags = Flags | PriorityFlag;
5035   llvm::Value *TaskFlags =
5036       Data.Final.getPointer()
5037           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
5038                                      CGF.Builder.getInt32(FinalFlag),
5039                                      CGF.Builder.getInt32(/*C=*/0))
5040           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
5041   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
5042   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
5043   llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
5044                               getThreadID(CGF, Loc), TaskFlags,
5045                               KmpTaskTWithPrivatesTySize, SharedsSize,
5046                               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5047                                   TaskEntry, KmpRoutineEntryPtrTy)};
5048   llvm::Value *NewTask = CGF.EmitRuntimeCall(
5049       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
5050   llvm::Value *NewTaskNewTaskTTy =
5051       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5052           NewTask, KmpTaskTWithPrivatesPtrTy);
5053   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
5054                                                KmpTaskTWithPrivatesQTy);
5055   LValue TDBase =
5056       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
5057   // Fill the data in the resulting kmp_task_t record.
5058   // Copy shareds if there are any.
5059   Address KmpTaskSharedsPtr = Address::invalid();
5060   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
5061     KmpTaskSharedsPtr =
5062         Address(CGF.EmitLoadOfScalar(
5063                     CGF.EmitLValueForField(
5064                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
5065                                            KmpTaskTShareds)),
5066                     Loc),
5067                 CGF.getNaturalTypeAlignment(SharedsTy));
5068     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
5069     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
5070     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
5071   }
5072   // Emit initial values for private copies (if any).
5073   TaskResultTy Result;
5074   if (!Privates.empty()) {
5075     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
5076                      SharedsTy, SharedsPtrTy, Data, Privates,
5077                      /*ForDup=*/false);
5078     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
5079         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
5080       Result.TaskDupFn = emitTaskDupFunction(
5081           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
5082           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
5083           /*WithLastIter=*/!Data.LastprivateVars.empty());
5084     }
5085   }
5086   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
5087   enum { Priority = 0, Destructors = 1 };
5088   // Provide pointer to function with destructors for privates.
5089   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
5090   const RecordDecl *KmpCmplrdataUD =
5091       (*FI)->getType()->getAsUnionType()->getDecl();
5092   if (NeedsCleanup) {
5093     llvm::Value *DestructorFn = emitDestructorsFunction(
5094         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5095         KmpTaskTWithPrivatesQTy);
5096     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
5097     LValue DestructorsLV = CGF.EmitLValueForField(
5098         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
5099     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5100                               DestructorFn, KmpRoutineEntryPtrTy),
5101                           DestructorsLV);
5102   }
5103   // Set priority.
5104   if (Data.Priority.getInt()) {
5105     LValue Data2LV = CGF.EmitLValueForField(
5106         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
5107     LValue PriorityLV = CGF.EmitLValueForField(
5108         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
5109     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
5110   }
5111   Result.NewTask = NewTask;
5112   Result.TaskEntry = TaskEntry;
5113   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
5114   Result.TDBase = TDBase;
5115   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
5116   return Result;
5117 }
5118 
5119 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5120                                    const OMPExecutableDirective &D,
5121                                    llvm::Function *TaskFunction,
5122                                    QualType SharedsTy, Address Shareds,
5123                                    const Expr *IfCond,
5124                                    const OMPTaskDataTy &Data) {
5125   if (!CGF.HaveInsertPoint())
5126     return;
5127 
5128   TaskResultTy Result =
5129       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5130   llvm::Value *NewTask = Result.NewTask;
5131   llvm::Function *TaskEntry = Result.TaskEntry;
5132   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5133   LValue TDBase = Result.TDBase;
5134   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5135   ASTContext &C = CGM.getContext();
5136   // Process list of dependences.
5137   Address DependenciesArray = Address::invalid();
5138   unsigned NumDependencies = Data.Dependences.size();
5139   if (NumDependencies) {
5140     // Dependence kind for RTL.
5141     enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 };
5142     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
5143     RecordDecl *KmpDependInfoRD;
5144     QualType FlagsTy =
5145         C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
5146     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5147     if (KmpDependInfoTy.isNull()) {
5148       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
5149       KmpDependInfoRD->startDefinition();
5150       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
5151       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
5152       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
5153       KmpDependInfoRD->completeDefinition();
5154       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
5155     } else {
5156       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5157     }
5158     // Define type kmp_depend_info[<Dependences.size()>];
5159     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5160         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
5161         ArrayType::Normal, /*IndexTypeQuals=*/0);
5162     // kmp_depend_info[<Dependences.size()>] deps;
5163     DependenciesArray =
5164         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
5165     for (unsigned I = 0; I < NumDependencies; ++I) {
5166       const Expr *E = Data.Dependences[I].second;
5167       LValue Addr = CGF.EmitLValue(E);
5168       llvm::Value *Size;
5169       QualType Ty = E->getType();
5170       if (const auto *ASE =
5171               dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
5172         LValue UpAddrLVal =
5173             CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
5174         llvm::Value *UpAddr =
5175             CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
5176         llvm::Value *LowIntPtr =
5177             CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
5178         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
5179         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
5180       } else {
5181         Size = CGF.getTypeSize(Ty);
5182       }
5183       LValue Base = CGF.MakeAddrLValue(
5184           CGF.Builder.CreateConstArrayGEP(DependenciesArray, I),
5185           KmpDependInfoTy);
5186       // deps[i].base_addr = &<Dependences[i].second>;
5187       LValue BaseAddrLVal = CGF.EmitLValueForField(
5188           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5189       CGF.EmitStoreOfScalar(
5190           CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
5191           BaseAddrLVal);
5192       // deps[i].len = sizeof(<Dependences[i].second>);
5193       LValue LenLVal = CGF.EmitLValueForField(
5194           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
5195       CGF.EmitStoreOfScalar(Size, LenLVal);
5196       // deps[i].flags = <Dependences[i].first>;
5197       RTLDependenceKindTy DepKind;
5198       switch (Data.Dependences[I].first) {
5199       case OMPC_DEPEND_in:
5200         DepKind = DepIn;
5201         break;
5202       // Out and InOut dependencies must use the same code.
5203       case OMPC_DEPEND_out:
5204       case OMPC_DEPEND_inout:
5205         DepKind = DepInOut;
5206         break;
5207       case OMPC_DEPEND_mutexinoutset:
5208         DepKind = DepMutexInOutSet;
5209         break;
5210       case OMPC_DEPEND_source:
5211       case OMPC_DEPEND_sink:
5212       case OMPC_DEPEND_unknown:
5213         llvm_unreachable("Unknown task dependence type");
5214       }
5215       LValue FlagsLVal = CGF.EmitLValueForField(
5216           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5217       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5218                             FlagsLVal);
5219     }
5220     DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5221         CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy);
5222   }
5223 
5224   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5225   // libcall.
5226   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5227   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5228   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5229   // list is not empty
5230   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5231   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5232   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5233   llvm::Value *DepTaskArgs[7];
5234   if (NumDependencies) {
5235     DepTaskArgs[0] = UpLoc;
5236     DepTaskArgs[1] = ThreadID;
5237     DepTaskArgs[2] = NewTask;
5238     DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
5239     DepTaskArgs[4] = DependenciesArray.getPointer();
5240     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5241     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5242   }
5243   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
5244                         &TaskArgs,
5245                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5246     if (!Data.Tied) {
5247       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5248       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5249       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5250     }
5251     if (NumDependencies) {
5252       CGF.EmitRuntimeCall(
5253           createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
5254     } else {
5255       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
5256                           TaskArgs);
5257     }
5258     // Check if parent region is untied and build return for untied task;
5259     if (auto *Region =
5260             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5261       Region->emitUntiedSwitch(CGF);
5262   };
5263 
5264   llvm::Value *DepWaitTaskArgs[6];
5265   if (NumDependencies) {
5266     DepWaitTaskArgs[0] = UpLoc;
5267     DepWaitTaskArgs[1] = ThreadID;
5268     DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
5269     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5270     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5271     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5272   }
5273   auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
5274                         NumDependencies, &DepWaitTaskArgs,
5275                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5276     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5277     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5278     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5279     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5280     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5281     // is specified.
5282     if (NumDependencies)
5283       CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
5284                           DepWaitTaskArgs);
5285     // Call proxy_task_entry(gtid, new_task);
5286     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5287                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5288       Action.Enter(CGF);
5289       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5290       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5291                                                           OutlinedFnArgs);
5292     };
5293 
5294     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5295     // kmp_task_t *new_task);
5296     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5297     // kmp_task_t *new_task);
5298     RegionCodeGenTy RCG(CodeGen);
5299     CommonActionTy Action(
5300         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
5301         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
5302     RCG.setAction(Action);
5303     RCG(CGF);
5304   };
5305 
5306   if (IfCond) {
5307     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5308   } else {
5309     RegionCodeGenTy ThenRCG(ThenCodeGen);
5310     ThenRCG(CGF);
5311   }
5312 }
5313 
5314 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5315                                        const OMPLoopDirective &D,
5316                                        llvm::Function *TaskFunction,
5317                                        QualType SharedsTy, Address Shareds,
5318                                        const Expr *IfCond,
5319                                        const OMPTaskDataTy &Data) {
5320   if (!CGF.HaveInsertPoint())
5321     return;
5322   TaskResultTy Result =
5323       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5324   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5325   // libcall.
5326   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5327   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5328   // sched, kmp_uint64 grainsize, void *task_dup);
5329   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5330   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5331   llvm::Value *IfVal;
5332   if (IfCond) {
5333     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5334                                       /*isSigned=*/true);
5335   } else {
5336     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5337   }
5338 
5339   LValue LBLVal = CGF.EmitLValueForField(
5340       Result.TDBase,
5341       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5342   const auto *LBVar =
5343       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5344   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
5345                        /*IsInitializer=*/true);
5346   LValue UBLVal = CGF.EmitLValueForField(
5347       Result.TDBase,
5348       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5349   const auto *UBVar =
5350       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5351   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
5352                        /*IsInitializer=*/true);
5353   LValue StLVal = CGF.EmitLValueForField(
5354       Result.TDBase,
5355       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5356   const auto *StVar =
5357       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5358   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
5359                        /*IsInitializer=*/true);
5360   // Store reductions address.
5361   LValue RedLVal = CGF.EmitLValueForField(
5362       Result.TDBase,
5363       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5364   if (Data.Reductions) {
5365     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5366   } else {
5367     CGF.EmitNullInitialization(RedLVal.getAddress(),
5368                                CGF.getContext().VoidPtrTy);
5369   }
5370   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5371   llvm::Value *TaskArgs[] = {
5372       UpLoc,
5373       ThreadID,
5374       Result.NewTask,
5375       IfVal,
5376       LBLVal.getPointer(),
5377       UBLVal.getPointer(),
5378       CGF.EmitLoadOfScalar(StLVal, Loc),
5379       llvm::ConstantInt::getSigned(
5380               CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5381       llvm::ConstantInt::getSigned(
5382           CGF.IntTy, Data.Schedule.getPointer()
5383                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5384                          : NoSchedule),
5385       Data.Schedule.getPointer()
5386           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5387                                       /*isSigned=*/false)
5388           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5389       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5390                              Result.TaskDupFn, CGF.VoidPtrTy)
5391                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5392   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
5393 }
5394 
5395 /// Emit reduction operation for each element of array (required for
5396 /// array sections) LHS op = RHS.
5397 /// \param Type Type of array.
5398 /// \param LHSVar Variable on the left side of the reduction operation
5399 /// (references element of array in original variable).
5400 /// \param RHSVar Variable on the right side of the reduction operation
5401 /// (references element of array in original variable).
5402 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5403 /// RHSVar.
5404 static void EmitOMPAggregateReduction(
5405     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5406     const VarDecl *RHSVar,
5407     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5408                                   const Expr *, const Expr *)> &RedOpGen,
5409     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5410     const Expr *UpExpr = nullptr) {
5411   // Perform element-by-element initialization.
5412   QualType ElementTy;
5413   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5414   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5415 
5416   // Drill down to the base element type on both arrays.
5417   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5418   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5419 
5420   llvm::Value *RHSBegin = RHSAddr.getPointer();
5421   llvm::Value *LHSBegin = LHSAddr.getPointer();
5422   // Cast from pointer to array type to pointer to single element.
5423   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5424   // The basic structure here is a while-do loop.
5425   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5426   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5427   llvm::Value *IsEmpty =
5428       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5429   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5430 
5431   // Enter the loop body, making that address the current address.
5432   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5433   CGF.EmitBlock(BodyBB);
5434 
5435   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5436 
5437   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5438       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5439   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5440   Address RHSElementCurrent =
5441       Address(RHSElementPHI,
5442               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5443 
5444   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5445       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5446   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5447   Address LHSElementCurrent =
5448       Address(LHSElementPHI,
5449               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5450 
5451   // Emit copy.
5452   CodeGenFunction::OMPPrivateScope Scope(CGF);
5453   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5454   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5455   Scope.Privatize();
5456   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5457   Scope.ForceCleanup();
5458 
5459   // Shift the address forward by one element.
5460   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5461       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5462   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5463       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5464   // Check whether we've reached the end.
5465   llvm::Value *Done =
5466       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5467   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5468   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5469   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5470 
5471   // Done.
5472   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5473 }
5474 
5475 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5476 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5477 /// UDR combiner function.
5478 static void emitReductionCombiner(CodeGenFunction &CGF,
5479                                   const Expr *ReductionOp) {
5480   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5481     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5482       if (const auto *DRE =
5483               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5484         if (const auto *DRD =
5485                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5486           std::pair<llvm::Function *, llvm::Function *> Reduction =
5487               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5488           RValue Func = RValue::get(Reduction.first);
5489           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5490           CGF.EmitIgnoredExpr(ReductionOp);
5491           return;
5492         }
5493   CGF.EmitIgnoredExpr(ReductionOp);
5494 }
5495 
5496 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5497     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5498     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5499     ArrayRef<const Expr *> ReductionOps) {
5500   ASTContext &C = CGM.getContext();
5501 
5502   // void reduction_func(void *LHSArg, void *RHSArg);
5503   FunctionArgList Args;
5504   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5505                            ImplicitParamDecl::Other);
5506   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5507                            ImplicitParamDecl::Other);
5508   Args.push_back(&LHSArg);
5509   Args.push_back(&RHSArg);
5510   const auto &CGFI =
5511       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5512   std::string Name = getName({"omp", "reduction", "reduction_func"});
5513   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5514                                     llvm::GlobalValue::InternalLinkage, Name,
5515                                     &CGM.getModule());
5516   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5517   Fn->setDoesNotRecurse();
5518   CodeGenFunction CGF(CGM);
5519   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5520 
5521   // Dst = (void*[n])(LHSArg);
5522   // Src = (void*[n])(RHSArg);
5523   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5524       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5525       ArgsType), CGF.getPointerAlign());
5526   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5527       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5528       ArgsType), CGF.getPointerAlign());
5529 
5530   //  ...
5531   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5532   //  ...
5533   CodeGenFunction::OMPPrivateScope Scope(CGF);
5534   auto IPriv = Privates.begin();
5535   unsigned Idx = 0;
5536   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5537     const auto *RHSVar =
5538         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5539     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5540       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5541     });
5542     const auto *LHSVar =
5543         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5544     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5545       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5546     });
5547     QualType PrivTy = (*IPriv)->getType();
5548     if (PrivTy->isVariablyModifiedType()) {
5549       // Get array size and emit VLA type.
5550       ++Idx;
5551       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5552       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5553       const VariableArrayType *VLA =
5554           CGF.getContext().getAsVariableArrayType(PrivTy);
5555       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5556       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5557           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5558       CGF.EmitVariablyModifiedType(PrivTy);
5559     }
5560   }
5561   Scope.Privatize();
5562   IPriv = Privates.begin();
5563   auto ILHS = LHSExprs.begin();
5564   auto IRHS = RHSExprs.begin();
5565   for (const Expr *E : ReductionOps) {
5566     if ((*IPriv)->getType()->isArrayType()) {
5567       // Emit reduction for array section.
5568       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5569       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5570       EmitOMPAggregateReduction(
5571           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5572           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5573             emitReductionCombiner(CGF, E);
5574           });
5575     } else {
5576       // Emit reduction for array subscript or single variable.
5577       emitReductionCombiner(CGF, E);
5578     }
5579     ++IPriv;
5580     ++ILHS;
5581     ++IRHS;
5582   }
5583   Scope.ForceCleanup();
5584   CGF.FinishFunction();
5585   return Fn;
5586 }
5587 
5588 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5589                                                   const Expr *ReductionOp,
5590                                                   const Expr *PrivateRef,
5591                                                   const DeclRefExpr *LHS,
5592                                                   const DeclRefExpr *RHS) {
5593   if (PrivateRef->getType()->isArrayType()) {
5594     // Emit reduction for array section.
5595     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5596     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5597     EmitOMPAggregateReduction(
5598         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5599         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5600           emitReductionCombiner(CGF, ReductionOp);
5601         });
5602   } else {
5603     // Emit reduction for array subscript or single variable.
5604     emitReductionCombiner(CGF, ReductionOp);
5605   }
5606 }
5607 
5608 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5609                                     ArrayRef<const Expr *> Privates,
5610                                     ArrayRef<const Expr *> LHSExprs,
5611                                     ArrayRef<const Expr *> RHSExprs,
5612                                     ArrayRef<const Expr *> ReductionOps,
5613                                     ReductionOptionsTy Options) {
5614   if (!CGF.HaveInsertPoint())
5615     return;
5616 
5617   bool WithNowait = Options.WithNowait;
5618   bool SimpleReduction = Options.SimpleReduction;
5619 
5620   // Next code should be emitted for reduction:
5621   //
5622   // static kmp_critical_name lock = { 0 };
5623   //
5624   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5625   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5626   //  ...
5627   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5628   //  *(Type<n>-1*)rhs[<n>-1]);
5629   // }
5630   //
5631   // ...
5632   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5633   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5634   // RedList, reduce_func, &<lock>)) {
5635   // case 1:
5636   //  ...
5637   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5638   //  ...
5639   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5640   // break;
5641   // case 2:
5642   //  ...
5643   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5644   //  ...
5645   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5646   // break;
5647   // default:;
5648   // }
5649   //
5650   // if SimpleReduction is true, only the next code is generated:
5651   //  ...
5652   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5653   //  ...
5654 
5655   ASTContext &C = CGM.getContext();
5656 
5657   if (SimpleReduction) {
5658     CodeGenFunction::RunCleanupsScope Scope(CGF);
5659     auto IPriv = Privates.begin();
5660     auto ILHS = LHSExprs.begin();
5661     auto IRHS = RHSExprs.begin();
5662     for (const Expr *E : ReductionOps) {
5663       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5664                                   cast<DeclRefExpr>(*IRHS));
5665       ++IPriv;
5666       ++ILHS;
5667       ++IRHS;
5668     }
5669     return;
5670   }
5671 
5672   // 1. Build a list of reduction variables.
5673   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5674   auto Size = RHSExprs.size();
5675   for (const Expr *E : Privates) {
5676     if (E->getType()->isVariablyModifiedType())
5677       // Reserve place for array size.
5678       ++Size;
5679   }
5680   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5681   QualType ReductionArrayTy =
5682       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
5683                              /*IndexTypeQuals=*/0);
5684   Address ReductionList =
5685       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5686   auto IPriv = Privates.begin();
5687   unsigned Idx = 0;
5688   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5689     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5690     CGF.Builder.CreateStore(
5691         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5692             CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
5693         Elem);
5694     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5695       // Store array size.
5696       ++Idx;
5697       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5698       llvm::Value *Size = CGF.Builder.CreateIntCast(
5699           CGF.getVLASize(
5700                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5701               .NumElts,
5702           CGF.SizeTy, /*isSigned=*/false);
5703       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5704                               Elem);
5705     }
5706   }
5707 
5708   // 2. Emit reduce_func().
5709   llvm::Function *ReductionFn = emitReductionFunction(
5710       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5711       LHSExprs, RHSExprs, ReductionOps);
5712 
5713   // 3. Create static kmp_critical_name lock = { 0 };
5714   std::string Name = getName({"reduction"});
5715   llvm::Value *Lock = getCriticalRegionLock(Name);
5716 
5717   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5718   // RedList, reduce_func, &<lock>);
5719   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5720   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5721   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5722   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5723       ReductionList.getPointer(), CGF.VoidPtrTy);
5724   llvm::Value *Args[] = {
5725       IdentTLoc,                             // ident_t *<loc>
5726       ThreadId,                              // i32 <gtid>
5727       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5728       ReductionArrayTySize,                  // size_type sizeof(RedList)
5729       RL,                                    // void *RedList
5730       ReductionFn, // void (*) (void *, void *) <reduce_func>
5731       Lock         // kmp_critical_name *&<lock>
5732   };
5733   llvm::Value *Res = CGF.EmitRuntimeCall(
5734       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
5735                                        : OMPRTL__kmpc_reduce),
5736       Args);
5737 
5738   // 5. Build switch(res)
5739   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5740   llvm::SwitchInst *SwInst =
5741       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5742 
5743   // 6. Build case 1:
5744   //  ...
5745   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5746   //  ...
5747   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5748   // break;
5749   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5750   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5751   CGF.EmitBlock(Case1BB);
5752 
5753   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5754   llvm::Value *EndArgs[] = {
5755       IdentTLoc, // ident_t *<loc>
5756       ThreadId,  // i32 <gtid>
5757       Lock       // kmp_critical_name *&<lock>
5758   };
5759   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5760                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5761     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5762     auto IPriv = Privates.begin();
5763     auto ILHS = LHSExprs.begin();
5764     auto IRHS = RHSExprs.begin();
5765     for (const Expr *E : ReductionOps) {
5766       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5767                                      cast<DeclRefExpr>(*IRHS));
5768       ++IPriv;
5769       ++ILHS;
5770       ++IRHS;
5771     }
5772   };
5773   RegionCodeGenTy RCG(CodeGen);
5774   CommonActionTy Action(
5775       nullptr, llvm::None,
5776       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
5777                                        : OMPRTL__kmpc_end_reduce),
5778       EndArgs);
5779   RCG.setAction(Action);
5780   RCG(CGF);
5781 
5782   CGF.EmitBranch(DefaultBB);
5783 
5784   // 7. Build case 2:
5785   //  ...
5786   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5787   //  ...
5788   // break;
5789   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5790   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5791   CGF.EmitBlock(Case2BB);
5792 
5793   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5794                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5795     auto ILHS = LHSExprs.begin();
5796     auto IRHS = RHSExprs.begin();
5797     auto IPriv = Privates.begin();
5798     for (const Expr *E : ReductionOps) {
5799       const Expr *XExpr = nullptr;
5800       const Expr *EExpr = nullptr;
5801       const Expr *UpExpr = nullptr;
5802       BinaryOperatorKind BO = BO_Comma;
5803       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5804         if (BO->getOpcode() == BO_Assign) {
5805           XExpr = BO->getLHS();
5806           UpExpr = BO->getRHS();
5807         }
5808       }
5809       // Try to emit update expression as a simple atomic.
5810       const Expr *RHSExpr = UpExpr;
5811       if (RHSExpr) {
5812         // Analyze RHS part of the whole expression.
5813         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5814                 RHSExpr->IgnoreParenImpCasts())) {
5815           // If this is a conditional operator, analyze its condition for
5816           // min/max reduction operator.
5817           RHSExpr = ACO->getCond();
5818         }
5819         if (const auto *BORHS =
5820                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5821           EExpr = BORHS->getRHS();
5822           BO = BORHS->getOpcode();
5823         }
5824       }
5825       if (XExpr) {
5826         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5827         auto &&AtomicRedGen = [BO, VD,
5828                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5829                                     const Expr *EExpr, const Expr *UpExpr) {
5830           LValue X = CGF.EmitLValue(XExpr);
5831           RValue E;
5832           if (EExpr)
5833             E = CGF.EmitAnyExpr(EExpr);
5834           CGF.EmitOMPAtomicSimpleUpdateExpr(
5835               X, E, BO, /*IsXLHSInRHSPart=*/true,
5836               llvm::AtomicOrdering::Monotonic, Loc,
5837               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5838                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5839                 PrivateScope.addPrivate(
5840                     VD, [&CGF, VD, XRValue, Loc]() {
5841                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5842                       CGF.emitOMPSimpleStore(
5843                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5844                           VD->getType().getNonReferenceType(), Loc);
5845                       return LHSTemp;
5846                     });
5847                 (void)PrivateScope.Privatize();
5848                 return CGF.EmitAnyExpr(UpExpr);
5849               });
5850         };
5851         if ((*IPriv)->getType()->isArrayType()) {
5852           // Emit atomic reduction for array section.
5853           const auto *RHSVar =
5854               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5855           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5856                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5857         } else {
5858           // Emit atomic reduction for array subscript or single variable.
5859           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5860         }
5861       } else {
5862         // Emit as a critical region.
5863         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5864                                            const Expr *, const Expr *) {
5865           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5866           std::string Name = RT.getName({"atomic_reduction"});
5867           RT.emitCriticalRegion(
5868               CGF, Name,
5869               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5870                 Action.Enter(CGF);
5871                 emitReductionCombiner(CGF, E);
5872               },
5873               Loc);
5874         };
5875         if ((*IPriv)->getType()->isArrayType()) {
5876           const auto *LHSVar =
5877               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5878           const auto *RHSVar =
5879               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5880           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5881                                     CritRedGen);
5882         } else {
5883           CritRedGen(CGF, nullptr, nullptr, nullptr);
5884         }
5885       }
5886       ++ILHS;
5887       ++IRHS;
5888       ++IPriv;
5889     }
5890   };
5891   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5892   if (!WithNowait) {
5893     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5894     llvm::Value *EndArgs[] = {
5895         IdentTLoc, // ident_t *<loc>
5896         ThreadId,  // i32 <gtid>
5897         Lock       // kmp_critical_name *&<lock>
5898     };
5899     CommonActionTy Action(nullptr, llvm::None,
5900                           createRuntimeFunction(OMPRTL__kmpc_end_reduce),
5901                           EndArgs);
5902     AtomicRCG.setAction(Action);
5903     AtomicRCG(CGF);
5904   } else {
5905     AtomicRCG(CGF);
5906   }
5907 
5908   CGF.EmitBranch(DefaultBB);
5909   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5910 }
5911 
5912 /// Generates unique name for artificial threadprivate variables.
5913 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5914 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5915                                       const Expr *Ref) {
5916   SmallString<256> Buffer;
5917   llvm::raw_svector_ostream Out(Buffer);
5918   const clang::DeclRefExpr *DE;
5919   const VarDecl *D = ::getBaseDecl(Ref, DE);
5920   if (!D)
5921     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5922   D = D->getCanonicalDecl();
5923   std::string Name = CGM.getOpenMPRuntime().getName(
5924       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5925   Out << Prefix << Name << "_"
5926       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5927   return Out.str();
5928 }
5929 
5930 /// Emits reduction initializer function:
5931 /// \code
5932 /// void @.red_init(void* %arg) {
5933 /// %0 = bitcast void* %arg to <type>*
5934 /// store <type> <init>, <type>* %0
5935 /// ret void
5936 /// }
5937 /// \endcode
5938 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5939                                            SourceLocation Loc,
5940                                            ReductionCodeGen &RCG, unsigned N) {
5941   ASTContext &C = CGM.getContext();
5942   FunctionArgList Args;
5943   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5944                           ImplicitParamDecl::Other);
5945   Args.emplace_back(&Param);
5946   const auto &FnInfo =
5947       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5948   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5949   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5950   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5951                                     Name, &CGM.getModule());
5952   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5953   Fn->setDoesNotRecurse();
5954   CodeGenFunction CGF(CGM);
5955   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5956   Address PrivateAddr = CGF.EmitLoadOfPointer(
5957       CGF.GetAddrOfLocalVar(&Param),
5958       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5959   llvm::Value *Size = nullptr;
5960   // If the size of the reduction item is non-constant, load it from global
5961   // threadprivate variable.
5962   if (RCG.getSizes(N).second) {
5963     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5964         CGF, CGM.getContext().getSizeType(),
5965         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5966     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5967                                 CGM.getContext().getSizeType(), Loc);
5968   }
5969   RCG.emitAggregateType(CGF, N, Size);
5970   LValue SharedLVal;
5971   // If initializer uses initializer from declare reduction construct, emit a
5972   // pointer to the address of the original reduction item (reuired by reduction
5973   // initializer)
5974   if (RCG.usesReductionInitializer(N)) {
5975     Address SharedAddr =
5976         CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5977             CGF, CGM.getContext().VoidPtrTy,
5978             generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
5979     SharedAddr = CGF.EmitLoadOfPointer(
5980         SharedAddr,
5981         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5982     SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5983   } else {
5984     SharedLVal = CGF.MakeNaturalAlignAddrLValue(
5985         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5986         CGM.getContext().VoidPtrTy);
5987   }
5988   // Emit the initializer:
5989   // %0 = bitcast void* %arg to <type>*
5990   // store <type> <init>, <type>* %0
5991   RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
5992                          [](CodeGenFunction &) { return false; });
5993   CGF.FinishFunction();
5994   return Fn;
5995 }
5996 
5997 /// Emits reduction combiner function:
5998 /// \code
5999 /// void @.red_comb(void* %arg0, void* %arg1) {
6000 /// %lhs = bitcast void* %arg0 to <type>*
6001 /// %rhs = bitcast void* %arg1 to <type>*
6002 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
6003 /// store <type> %2, <type>* %lhs
6004 /// ret void
6005 /// }
6006 /// \endcode
6007 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
6008                                            SourceLocation Loc,
6009                                            ReductionCodeGen &RCG, unsigned N,
6010                                            const Expr *ReductionOp,
6011                                            const Expr *LHS, const Expr *RHS,
6012                                            const Expr *PrivateRef) {
6013   ASTContext &C = CGM.getContext();
6014   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
6015   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
6016   FunctionArgList Args;
6017   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
6018                                C.VoidPtrTy, ImplicitParamDecl::Other);
6019   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6020                             ImplicitParamDecl::Other);
6021   Args.emplace_back(&ParamInOut);
6022   Args.emplace_back(&ParamIn);
6023   const auto &FnInfo =
6024       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6025   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6026   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
6027   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6028                                     Name, &CGM.getModule());
6029   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6030   Fn->setDoesNotRecurse();
6031   CodeGenFunction CGF(CGM);
6032   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6033   llvm::Value *Size = nullptr;
6034   // If the size of the reduction item is non-constant, load it from global
6035   // threadprivate variable.
6036   if (RCG.getSizes(N).second) {
6037     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6038         CGF, CGM.getContext().getSizeType(),
6039         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6040     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6041                                 CGM.getContext().getSizeType(), Loc);
6042   }
6043   RCG.emitAggregateType(CGF, N, Size);
6044   // Remap lhs and rhs variables to the addresses of the function arguments.
6045   // %lhs = bitcast void* %arg0 to <type>*
6046   // %rhs = bitcast void* %arg1 to <type>*
6047   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6048   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
6049     // Pull out the pointer to the variable.
6050     Address PtrAddr = CGF.EmitLoadOfPointer(
6051         CGF.GetAddrOfLocalVar(&ParamInOut),
6052         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6053     return CGF.Builder.CreateElementBitCast(
6054         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
6055   });
6056   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
6057     // Pull out the pointer to the variable.
6058     Address PtrAddr = CGF.EmitLoadOfPointer(
6059         CGF.GetAddrOfLocalVar(&ParamIn),
6060         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6061     return CGF.Builder.CreateElementBitCast(
6062         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6063   });
6064   PrivateScope.Privatize();
6065   // Emit the combiner body:
6066   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6067   // store <type> %2, <type>* %lhs
6068   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6069       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6070       cast<DeclRefExpr>(RHS));
6071   CGF.FinishFunction();
6072   return Fn;
6073 }
6074 
6075 /// Emits reduction finalizer function:
6076 /// \code
6077 /// void @.red_fini(void* %arg) {
6078 /// %0 = bitcast void* %arg to <type>*
6079 /// <destroy>(<type>* %0)
6080 /// ret void
6081 /// }
6082 /// \endcode
6083 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6084                                            SourceLocation Loc,
6085                                            ReductionCodeGen &RCG, unsigned N) {
6086   if (!RCG.needCleanups(N))
6087     return nullptr;
6088   ASTContext &C = CGM.getContext();
6089   FunctionArgList Args;
6090   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6091                           ImplicitParamDecl::Other);
6092   Args.emplace_back(&Param);
6093   const auto &FnInfo =
6094       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6095   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6096   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6097   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6098                                     Name, &CGM.getModule());
6099   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6100   Fn->setDoesNotRecurse();
6101   CodeGenFunction CGF(CGM);
6102   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6103   Address PrivateAddr = CGF.EmitLoadOfPointer(
6104       CGF.GetAddrOfLocalVar(&Param),
6105       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6106   llvm::Value *Size = nullptr;
6107   // If the size of the reduction item is non-constant, load it from global
6108   // threadprivate variable.
6109   if (RCG.getSizes(N).second) {
6110     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6111         CGF, CGM.getContext().getSizeType(),
6112         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6113     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6114                                 CGM.getContext().getSizeType(), Loc);
6115   }
6116   RCG.emitAggregateType(CGF, N, Size);
6117   // Emit the finalizer body:
6118   // <destroy>(<type>* %0)
6119   RCG.emitCleanups(CGF, N, PrivateAddr);
6120   CGF.FinishFunction();
6121   return Fn;
6122 }
6123 
6124 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6125     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6126     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6127   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6128     return nullptr;
6129 
6130   // Build typedef struct:
6131   // kmp_task_red_input {
6132   //   void *reduce_shar; // shared reduction item
6133   //   size_t reduce_size; // size of data item
6134   //   void *reduce_init; // data initialization routine
6135   //   void *reduce_fini; // data finalization routine
6136   //   void *reduce_comb; // data combiner routine
6137   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6138   // } kmp_task_red_input_t;
6139   ASTContext &C = CGM.getContext();
6140   RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t");
6141   RD->startDefinition();
6142   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6143   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6144   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6145   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6146   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6147   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6148       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6149   RD->completeDefinition();
6150   QualType RDType = C.getRecordType(RD);
6151   unsigned Size = Data.ReductionVars.size();
6152   llvm::APInt ArraySize(/*numBits=*/64, Size);
6153   QualType ArrayRDType = C.getConstantArrayType(
6154       RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0);
6155   // kmp_task_red_input_t .rd_input.[Size];
6156   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6157   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
6158                        Data.ReductionOps);
6159   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6160     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6161     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6162                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6163     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6164         TaskRedInput.getPointer(), Idxs,
6165         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6166         ".rd_input.gep.");
6167     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6168     // ElemLVal.reduce_shar = &Shareds[Cnt];
6169     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6170     RCG.emitSharedLValue(CGF, Cnt);
6171     llvm::Value *CastedShared =
6172         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer());
6173     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6174     RCG.emitAggregateType(CGF, Cnt);
6175     llvm::Value *SizeValInChars;
6176     llvm::Value *SizeVal;
6177     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6178     // We use delayed creation/initialization for VLAs, array sections and
6179     // custom reduction initializations. It is required because runtime does not
6180     // provide the way to pass the sizes of VLAs/array sections to
6181     // initializer/combiner/finalizer functions and does not pass the pointer to
6182     // original reduction item to the initializer. Instead threadprivate global
6183     // variables are used to store these values and use them in the functions.
6184     bool DelayedCreation = !!SizeVal;
6185     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6186                                                /*isSigned=*/false);
6187     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6188     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6189     // ElemLVal.reduce_init = init;
6190     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6191     llvm::Value *InitAddr =
6192         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6193     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6194     DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
6195     // ElemLVal.reduce_fini = fini;
6196     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6197     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6198     llvm::Value *FiniAddr = Fini
6199                                 ? CGF.EmitCastToVoidPtr(Fini)
6200                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6201     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6202     // ElemLVal.reduce_comb = comb;
6203     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6204     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6205         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6206         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6207     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6208     // ElemLVal.flags = 0;
6209     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6210     if (DelayedCreation) {
6211       CGF.EmitStoreOfScalar(
6212           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*IsSigned=*/true),
6213           FlagsLVal);
6214     } else
6215       CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
6216   }
6217   // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
6218   // *data);
6219   llvm::Value *Args[] = {
6220       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6221                                 /*isSigned=*/true),
6222       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6223       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6224                                                       CGM.VoidPtrTy)};
6225   return CGF.EmitRuntimeCall(
6226       createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args);
6227 }
6228 
6229 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6230                                               SourceLocation Loc,
6231                                               ReductionCodeGen &RCG,
6232                                               unsigned N) {
6233   auto Sizes = RCG.getSizes(N);
6234   // Emit threadprivate global variable if the type is non-constant
6235   // (Sizes.second = nullptr).
6236   if (Sizes.second) {
6237     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6238                                                      /*isSigned=*/false);
6239     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6240         CGF, CGM.getContext().getSizeType(),
6241         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6242     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6243   }
6244   // Store address of the original reduction item if custom initializer is used.
6245   if (RCG.usesReductionInitializer(N)) {
6246     Address SharedAddr = getAddrOfArtificialThreadPrivate(
6247         CGF, CGM.getContext().VoidPtrTy,
6248         generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6249     CGF.Builder.CreateStore(
6250         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6251             RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy),
6252         SharedAddr, /*IsVolatile=*/false);
6253   }
6254 }
6255 
6256 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6257                                               SourceLocation Loc,
6258                                               llvm::Value *ReductionsPtr,
6259                                               LValue SharedLVal) {
6260   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6261   // *d);
6262   llvm::Value *Args[] = {
6263       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6264                                 /*isSigned=*/true),
6265       ReductionsPtr,
6266       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(),
6267                                                       CGM.VoidPtrTy)};
6268   return Address(
6269       CGF.EmitRuntimeCall(
6270           createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
6271       SharedLVal.getAlignment());
6272 }
6273 
6274 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6275                                        SourceLocation Loc) {
6276   if (!CGF.HaveInsertPoint())
6277     return;
6278   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6279   // global_tid);
6280   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6281   // Ignore return result until untied tasks are supported.
6282   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
6283   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6284     Region->emitUntiedSwitch(CGF);
6285 }
6286 
6287 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6288                                            OpenMPDirectiveKind InnerKind,
6289                                            const RegionCodeGenTy &CodeGen,
6290                                            bool HasCancel) {
6291   if (!CGF.HaveInsertPoint())
6292     return;
6293   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6294   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6295 }
6296 
6297 namespace {
6298 enum RTCancelKind {
6299   CancelNoreq = 0,
6300   CancelParallel = 1,
6301   CancelLoop = 2,
6302   CancelSections = 3,
6303   CancelTaskgroup = 4
6304 };
6305 } // anonymous namespace
6306 
6307 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6308   RTCancelKind CancelKind = CancelNoreq;
6309   if (CancelRegion == OMPD_parallel)
6310     CancelKind = CancelParallel;
6311   else if (CancelRegion == OMPD_for)
6312     CancelKind = CancelLoop;
6313   else if (CancelRegion == OMPD_sections)
6314     CancelKind = CancelSections;
6315   else {
6316     assert(CancelRegion == OMPD_taskgroup);
6317     CancelKind = CancelTaskgroup;
6318   }
6319   return CancelKind;
6320 }
6321 
6322 void CGOpenMPRuntime::emitCancellationPointCall(
6323     CodeGenFunction &CGF, SourceLocation Loc,
6324     OpenMPDirectiveKind CancelRegion) {
6325   if (!CGF.HaveInsertPoint())
6326     return;
6327   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6328   // global_tid, kmp_int32 cncl_kind);
6329   if (auto *OMPRegionInfo =
6330           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6331     // For 'cancellation point taskgroup', the task region info may not have a
6332     // cancel. This may instead happen in another adjacent task.
6333     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6334       llvm::Value *Args[] = {
6335           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6336           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6337       // Ignore return result until untied tasks are supported.
6338       llvm::Value *Result = CGF.EmitRuntimeCall(
6339           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
6340       // if (__kmpc_cancellationpoint()) {
6341       //   exit from construct;
6342       // }
6343       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6344       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6345       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6346       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6347       CGF.EmitBlock(ExitBB);
6348       // exit from construct;
6349       CodeGenFunction::JumpDest CancelDest =
6350           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6351       CGF.EmitBranchThroughCleanup(CancelDest);
6352       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6353     }
6354   }
6355 }
6356 
6357 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6358                                      const Expr *IfCond,
6359                                      OpenMPDirectiveKind CancelRegion) {
6360   if (!CGF.HaveInsertPoint())
6361     return;
6362   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6363   // kmp_int32 cncl_kind);
6364   if (auto *OMPRegionInfo =
6365           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6366     auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
6367                                                         PrePostActionTy &) {
6368       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6369       llvm::Value *Args[] = {
6370           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6371           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6372       // Ignore return result until untied tasks are supported.
6373       llvm::Value *Result = CGF.EmitRuntimeCall(
6374           RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
6375       // if (__kmpc_cancel()) {
6376       //   exit from construct;
6377       // }
6378       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6379       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6380       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6381       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6382       CGF.EmitBlock(ExitBB);
6383       // exit from construct;
6384       CodeGenFunction::JumpDest CancelDest =
6385           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6386       CGF.EmitBranchThroughCleanup(CancelDest);
6387       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6388     };
6389     if (IfCond) {
6390       emitOMPIfClause(CGF, IfCond, ThenGen,
6391                       [](CodeGenFunction &, PrePostActionTy &) {});
6392     } else {
6393       RegionCodeGenTy ThenRCG(ThenGen);
6394       ThenRCG(CGF);
6395     }
6396   }
6397 }
6398 
6399 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6400     const OMPExecutableDirective &D, StringRef ParentName,
6401     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6402     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6403   assert(!ParentName.empty() && "Invalid target region parent name!");
6404   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6405                                    IsOffloadEntry, CodeGen);
6406 }
6407 
6408 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6409     const OMPExecutableDirective &D, StringRef ParentName,
6410     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6411     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6412   // Create a unique name for the entry function using the source location
6413   // information of the current target region. The name will be something like:
6414   //
6415   // __omp_offloading_DD_FFFF_PP_lBB
6416   //
6417   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6418   // mangled name of the function that encloses the target region and BB is the
6419   // line number of the target region.
6420 
6421   unsigned DeviceID;
6422   unsigned FileID;
6423   unsigned Line;
6424   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6425                            Line);
6426   SmallString<64> EntryFnName;
6427   {
6428     llvm::raw_svector_ostream OS(EntryFnName);
6429     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6430        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6431   }
6432 
6433   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6434 
6435   CodeGenFunction CGF(CGM, true);
6436   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6437   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6438 
6439   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
6440 
6441   // If this target outline function is not an offload entry, we don't need to
6442   // register it.
6443   if (!IsOffloadEntry)
6444     return;
6445 
6446   // The target region ID is used by the runtime library to identify the current
6447   // target region, so it only has to be unique and not necessarily point to
6448   // anything. It could be the pointer to the outlined function that implements
6449   // the target region, but we aren't using that so that the compiler doesn't
6450   // need to keep that, and could therefore inline the host function if proven
6451   // worthwhile during optimization. In the other hand, if emitting code for the
6452   // device, the ID has to be the function address so that it can retrieved from
6453   // the offloading entry and launched by the runtime library. We also mark the
6454   // outlined function to have external linkage in case we are emitting code for
6455   // the device, because these functions will be entry points to the device.
6456 
6457   if (CGM.getLangOpts().OpenMPIsDevice) {
6458     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6459     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6460     OutlinedFn->setDSOLocal(false);
6461   } else {
6462     std::string Name = getName({EntryFnName, "region_id"});
6463     OutlinedFnID = new llvm::GlobalVariable(
6464         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6465         llvm::GlobalValue::WeakAnyLinkage,
6466         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6467   }
6468 
6469   // Register the information for the entry associated with this target region.
6470   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6471       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6472       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6473 }
6474 
6475 /// Checks if the expression is constant or does not have non-trivial function
6476 /// calls.
6477 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6478   // We can skip constant expressions.
6479   // We can skip expressions with trivial calls or simple expressions.
6480   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6481           !E->hasNonTrivialCall(Ctx)) &&
6482          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6483 }
6484 
6485 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6486                                                     const Stmt *Body) {
6487   const Stmt *Child = Body->IgnoreContainers();
6488   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6489     Child = nullptr;
6490     for (const Stmt *S : C->body()) {
6491       if (const auto *E = dyn_cast<Expr>(S)) {
6492         if (isTrivial(Ctx, E))
6493           continue;
6494       }
6495       // Some of the statements can be ignored.
6496       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6497           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6498         continue;
6499       // Analyze declarations.
6500       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6501         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6502               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6503                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6504                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6505                   isa<UsingDirectiveDecl>(D) ||
6506                   isa<OMPDeclareReductionDecl>(D) ||
6507                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6508                 return true;
6509               const auto *VD = dyn_cast<VarDecl>(D);
6510               if (!VD)
6511                 return false;
6512               return VD->isConstexpr() ||
6513                      ((VD->getType().isTrivialType(Ctx) ||
6514                        VD->getType()->isReferenceType()) &&
6515                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6516             }))
6517           continue;
6518       }
6519       // Found multiple children - cannot get the one child only.
6520       if (Child)
6521         return nullptr;
6522       Child = S;
6523     }
6524     if (Child)
6525       Child = Child->IgnoreContainers();
6526   }
6527   return Child;
6528 }
6529 
6530 /// Emit the number of teams for a target directive.  Inspect the num_teams
6531 /// clause associated with a teams construct combined or closely nested
6532 /// with the target directive.
6533 ///
6534 /// Emit a team of size one for directives such as 'target parallel' that
6535 /// have no associated teams construct.
6536 ///
6537 /// Otherwise, return nullptr.
6538 static llvm::Value *
6539 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6540                                const OMPExecutableDirective &D) {
6541   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6542          "Clauses associated with the teams directive expected to be emitted "
6543          "only for the host!");
6544   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6545   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6546          "Expected target-based executable directive.");
6547   CGBuilderTy &Bld = CGF.Builder;
6548   switch (DirectiveKind) {
6549   case OMPD_target: {
6550     const auto *CS = D.getInnermostCapturedStmt();
6551     const auto *Body =
6552         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6553     const Stmt *ChildStmt =
6554         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6555     if (const auto *NestedDir =
6556             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6557       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6558         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6559           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6560           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6561           const Expr *NumTeams =
6562               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6563           llvm::Value *NumTeamsVal =
6564               CGF.EmitScalarExpr(NumTeams,
6565                                  /*IgnoreResultAssign*/ true);
6566           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6567                                    /*IsSigned=*/true);
6568         }
6569         return Bld.getInt32(0);
6570       }
6571       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6572           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6573         return Bld.getInt32(1);
6574       return Bld.getInt32(0);
6575     }
6576     return nullptr;
6577   }
6578   case OMPD_target_teams:
6579   case OMPD_target_teams_distribute:
6580   case OMPD_target_teams_distribute_simd:
6581   case OMPD_target_teams_distribute_parallel_for:
6582   case OMPD_target_teams_distribute_parallel_for_simd: {
6583     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6584       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6585       const Expr *NumTeams =
6586           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6587       llvm::Value *NumTeamsVal =
6588           CGF.EmitScalarExpr(NumTeams,
6589                              /*IgnoreResultAssign*/ true);
6590       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6591                                /*IsSigned=*/true);
6592     }
6593     return Bld.getInt32(0);
6594   }
6595   case OMPD_target_parallel:
6596   case OMPD_target_parallel_for:
6597   case OMPD_target_parallel_for_simd:
6598   case OMPD_target_simd:
6599     return Bld.getInt32(1);
6600   case OMPD_parallel:
6601   case OMPD_for:
6602   case OMPD_parallel_for:
6603   case OMPD_parallel_sections:
6604   case OMPD_for_simd:
6605   case OMPD_parallel_for_simd:
6606   case OMPD_cancel:
6607   case OMPD_cancellation_point:
6608   case OMPD_ordered:
6609   case OMPD_threadprivate:
6610   case OMPD_allocate:
6611   case OMPD_task:
6612   case OMPD_simd:
6613   case OMPD_sections:
6614   case OMPD_section:
6615   case OMPD_single:
6616   case OMPD_master:
6617   case OMPD_critical:
6618   case OMPD_taskyield:
6619   case OMPD_barrier:
6620   case OMPD_taskwait:
6621   case OMPD_taskgroup:
6622   case OMPD_atomic:
6623   case OMPD_flush:
6624   case OMPD_teams:
6625   case OMPD_target_data:
6626   case OMPD_target_exit_data:
6627   case OMPD_target_enter_data:
6628   case OMPD_distribute:
6629   case OMPD_distribute_simd:
6630   case OMPD_distribute_parallel_for:
6631   case OMPD_distribute_parallel_for_simd:
6632   case OMPD_teams_distribute:
6633   case OMPD_teams_distribute_simd:
6634   case OMPD_teams_distribute_parallel_for:
6635   case OMPD_teams_distribute_parallel_for_simd:
6636   case OMPD_target_update:
6637   case OMPD_declare_simd:
6638   case OMPD_declare_target:
6639   case OMPD_end_declare_target:
6640   case OMPD_declare_reduction:
6641   case OMPD_declare_mapper:
6642   case OMPD_taskloop:
6643   case OMPD_taskloop_simd:
6644   case OMPD_requires:
6645   case OMPD_unknown:
6646     break;
6647   }
6648   llvm_unreachable("Unexpected directive kind.");
6649 }
6650 
6651 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6652                                   llvm::Value *DefaultThreadLimitVal) {
6653   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6654       CGF.getContext(), CS->getCapturedStmt());
6655   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6656     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6657       llvm::Value *NumThreads = nullptr;
6658       llvm::Value *CondVal = nullptr;
6659       // Handle if clause. If if clause present, the number of threads is
6660       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6661       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6662         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6663         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6664         const OMPIfClause *IfClause = nullptr;
6665         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6666           if (C->getNameModifier() == OMPD_unknown ||
6667               C->getNameModifier() == OMPD_parallel) {
6668             IfClause = C;
6669             break;
6670           }
6671         }
6672         if (IfClause) {
6673           const Expr *Cond = IfClause->getCondition();
6674           bool Result;
6675           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6676             if (!Result)
6677               return CGF.Builder.getInt32(1);
6678           } else {
6679             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6680             if (const auto *PreInit =
6681                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6682               for (const auto *I : PreInit->decls()) {
6683                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6684                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6685                 } else {
6686                   CodeGenFunction::AutoVarEmission Emission =
6687                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6688                   CGF.EmitAutoVarCleanups(Emission);
6689                 }
6690               }
6691             }
6692             CondVal = CGF.EvaluateExprAsBool(Cond);
6693           }
6694         }
6695       }
6696       // Check the value of num_threads clause iff if clause was not specified
6697       // or is not evaluated to false.
6698       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6699         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6700         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6701         const auto *NumThreadsClause =
6702             Dir->getSingleClause<OMPNumThreadsClause>();
6703         CodeGenFunction::LexicalScope Scope(
6704             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6705         if (const auto *PreInit =
6706                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6707           for (const auto *I : PreInit->decls()) {
6708             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6709               CGF.EmitVarDecl(cast<VarDecl>(*I));
6710             } else {
6711               CodeGenFunction::AutoVarEmission Emission =
6712                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6713               CGF.EmitAutoVarCleanups(Emission);
6714             }
6715           }
6716         }
6717         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6718         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6719                                                /*IsSigned=*/false);
6720         if (DefaultThreadLimitVal)
6721           NumThreads = CGF.Builder.CreateSelect(
6722               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6723               DefaultThreadLimitVal, NumThreads);
6724       } else {
6725         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6726                                            : CGF.Builder.getInt32(0);
6727       }
6728       // Process condition of the if clause.
6729       if (CondVal) {
6730         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6731                                               CGF.Builder.getInt32(1));
6732       }
6733       return NumThreads;
6734     }
6735     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6736       return CGF.Builder.getInt32(1);
6737     return DefaultThreadLimitVal;
6738   }
6739   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6740                                : CGF.Builder.getInt32(0);
6741 }
6742 
6743 /// Emit the number of threads for a target directive.  Inspect the
6744 /// thread_limit clause associated with a teams construct combined or closely
6745 /// nested with the target directive.
6746 ///
6747 /// Emit the num_threads clause for directives such as 'target parallel' that
6748 /// have no associated teams construct.
6749 ///
6750 /// Otherwise, return nullptr.
6751 static llvm::Value *
6752 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6753                                  const OMPExecutableDirective &D) {
6754   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6755          "Clauses associated with the teams directive expected to be emitted "
6756          "only for the host!");
6757   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6758   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6759          "Expected target-based executable directive.");
6760   CGBuilderTy &Bld = CGF.Builder;
6761   llvm::Value *ThreadLimitVal = nullptr;
6762   llvm::Value *NumThreadsVal = nullptr;
6763   switch (DirectiveKind) {
6764   case OMPD_target: {
6765     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6766     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6767       return NumThreads;
6768     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6769         CGF.getContext(), CS->getCapturedStmt());
6770     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6771       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6772         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6773         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6774         const auto *ThreadLimitClause =
6775             Dir->getSingleClause<OMPThreadLimitClause>();
6776         CodeGenFunction::LexicalScope Scope(
6777             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6778         if (const auto *PreInit =
6779                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6780           for (const auto *I : PreInit->decls()) {
6781             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6782               CGF.EmitVarDecl(cast<VarDecl>(*I));
6783             } else {
6784               CodeGenFunction::AutoVarEmission Emission =
6785                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6786               CGF.EmitAutoVarCleanups(Emission);
6787             }
6788           }
6789         }
6790         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6791             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6792         ThreadLimitVal =
6793             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false);
6794       }
6795       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6796           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6797         CS = Dir->getInnermostCapturedStmt();
6798         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6799             CGF.getContext(), CS->getCapturedStmt());
6800         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6801       }
6802       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6803           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6804         CS = Dir->getInnermostCapturedStmt();
6805         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6806           return NumThreads;
6807       }
6808       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6809         return Bld.getInt32(1);
6810     }
6811     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6812   }
6813   case OMPD_target_teams: {
6814     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6815       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6816       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6817       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6818           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6819       ThreadLimitVal =
6820           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false);
6821     }
6822     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6823     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6824       return NumThreads;
6825     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6826         CGF.getContext(), CS->getCapturedStmt());
6827     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6828       if (Dir->getDirectiveKind() == OMPD_distribute) {
6829         CS = Dir->getInnermostCapturedStmt();
6830         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6831           return NumThreads;
6832       }
6833     }
6834     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6835   }
6836   case OMPD_target_teams_distribute:
6837     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6838       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6839       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6840       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6841           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6842       ThreadLimitVal =
6843           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false);
6844     }
6845     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6846   case OMPD_target_parallel:
6847   case OMPD_target_parallel_for:
6848   case OMPD_target_parallel_for_simd:
6849   case OMPD_target_teams_distribute_parallel_for:
6850   case OMPD_target_teams_distribute_parallel_for_simd: {
6851     llvm::Value *CondVal = nullptr;
6852     // Handle if clause. If if clause present, the number of threads is
6853     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6854     if (D.hasClausesOfKind<OMPIfClause>()) {
6855       const OMPIfClause *IfClause = nullptr;
6856       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6857         if (C->getNameModifier() == OMPD_unknown ||
6858             C->getNameModifier() == OMPD_parallel) {
6859           IfClause = C;
6860           break;
6861         }
6862       }
6863       if (IfClause) {
6864         const Expr *Cond = IfClause->getCondition();
6865         bool Result;
6866         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6867           if (!Result)
6868             return Bld.getInt32(1);
6869         } else {
6870           CodeGenFunction::RunCleanupsScope Scope(CGF);
6871           CondVal = CGF.EvaluateExprAsBool(Cond);
6872         }
6873       }
6874     }
6875     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6876       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6877       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6878       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6879           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6880       ThreadLimitVal =
6881           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false);
6882     }
6883     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6884       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6885       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6886       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6887           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6888       NumThreadsVal =
6889           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/false);
6890       ThreadLimitVal = ThreadLimitVal
6891                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6892                                                                 ThreadLimitVal),
6893                                               NumThreadsVal, ThreadLimitVal)
6894                            : NumThreadsVal;
6895     }
6896     if (!ThreadLimitVal)
6897       ThreadLimitVal = Bld.getInt32(0);
6898     if (CondVal)
6899       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6900     return ThreadLimitVal;
6901   }
6902   case OMPD_target_teams_distribute_simd:
6903   case OMPD_target_simd:
6904     return Bld.getInt32(1);
6905   case OMPD_parallel:
6906   case OMPD_for:
6907   case OMPD_parallel_for:
6908   case OMPD_parallel_sections:
6909   case OMPD_for_simd:
6910   case OMPD_parallel_for_simd:
6911   case OMPD_cancel:
6912   case OMPD_cancellation_point:
6913   case OMPD_ordered:
6914   case OMPD_threadprivate:
6915   case OMPD_allocate:
6916   case OMPD_task:
6917   case OMPD_simd:
6918   case OMPD_sections:
6919   case OMPD_section:
6920   case OMPD_single:
6921   case OMPD_master:
6922   case OMPD_critical:
6923   case OMPD_taskyield:
6924   case OMPD_barrier:
6925   case OMPD_taskwait:
6926   case OMPD_taskgroup:
6927   case OMPD_atomic:
6928   case OMPD_flush:
6929   case OMPD_teams:
6930   case OMPD_target_data:
6931   case OMPD_target_exit_data:
6932   case OMPD_target_enter_data:
6933   case OMPD_distribute:
6934   case OMPD_distribute_simd:
6935   case OMPD_distribute_parallel_for:
6936   case OMPD_distribute_parallel_for_simd:
6937   case OMPD_teams_distribute:
6938   case OMPD_teams_distribute_simd:
6939   case OMPD_teams_distribute_parallel_for:
6940   case OMPD_teams_distribute_parallel_for_simd:
6941   case OMPD_target_update:
6942   case OMPD_declare_simd:
6943   case OMPD_declare_target:
6944   case OMPD_end_declare_target:
6945   case OMPD_declare_reduction:
6946   case OMPD_declare_mapper:
6947   case OMPD_taskloop:
6948   case OMPD_taskloop_simd:
6949   case OMPD_requires:
6950   case OMPD_unknown:
6951     break;
6952   }
6953   llvm_unreachable("Unsupported directive kind.");
6954 }
6955 
6956 namespace {
6957 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6958 
6959 // Utility to handle information from clauses associated with a given
6960 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6961 // It provides a convenient interface to obtain the information and generate
6962 // code for that information.
6963 class MappableExprsHandler {
6964 public:
6965   /// Values for bit flags used to specify the mapping type for
6966   /// offloading.
6967   enum OpenMPOffloadMappingFlags : uint64_t {
6968     /// No flags
6969     OMP_MAP_NONE = 0x0,
6970     /// Allocate memory on the device and move data from host to device.
6971     OMP_MAP_TO = 0x01,
6972     /// Allocate memory on the device and move data from device to host.
6973     OMP_MAP_FROM = 0x02,
6974     /// Always perform the requested mapping action on the element, even
6975     /// if it was already mapped before.
6976     OMP_MAP_ALWAYS = 0x04,
6977     /// Delete the element from the device environment, ignoring the
6978     /// current reference count associated with the element.
6979     OMP_MAP_DELETE = 0x08,
6980     /// The element being mapped is a pointer-pointee pair; both the
6981     /// pointer and the pointee should be mapped.
6982     OMP_MAP_PTR_AND_OBJ = 0x10,
6983     /// This flags signals that the base address of an entry should be
6984     /// passed to the target kernel as an argument.
6985     OMP_MAP_TARGET_PARAM = 0x20,
6986     /// Signal that the runtime library has to return the device pointer
6987     /// in the current position for the data being mapped. Used when we have the
6988     /// use_device_ptr clause.
6989     OMP_MAP_RETURN_PARAM = 0x40,
6990     /// This flag signals that the reference being passed is a pointer to
6991     /// private data.
6992     OMP_MAP_PRIVATE = 0x80,
6993     /// Pass the element to the device by value.
6994     OMP_MAP_LITERAL = 0x100,
6995     /// Implicit map
6996     OMP_MAP_IMPLICIT = 0x200,
6997     /// The 16 MSBs of the flags indicate whether the entry is member of some
6998     /// struct/class.
6999     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7000     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7001   };
7002 
7003   /// Class that associates information with a base pointer to be passed to the
7004   /// runtime library.
7005   class BasePointerInfo {
7006     /// The base pointer.
7007     llvm::Value *Ptr = nullptr;
7008     /// The base declaration that refers to this device pointer, or null if
7009     /// there is none.
7010     const ValueDecl *DevPtrDecl = nullptr;
7011 
7012   public:
7013     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7014         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7015     llvm::Value *operator*() const { return Ptr; }
7016     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7017     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7018   };
7019 
7020   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7021   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7022   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7023 
7024   /// Map between a struct and the its lowest & highest elements which have been
7025   /// mapped.
7026   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7027   ///                    HE(FieldIndex, Pointer)}
7028   struct StructRangeInfoTy {
7029     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7030         0, Address::invalid()};
7031     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7032         0, Address::invalid()};
7033     Address Base = Address::invalid();
7034   };
7035 
7036 private:
7037   /// Kind that defines how a device pointer has to be returned.
7038   struct MapInfo {
7039     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7040     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7041     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7042     bool ReturnDevicePointer = false;
7043     bool IsImplicit = false;
7044 
7045     MapInfo() = default;
7046     MapInfo(
7047         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7048         OpenMPMapClauseKind MapType,
7049         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7050         bool ReturnDevicePointer, bool IsImplicit)
7051         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7052           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
7053   };
7054 
7055   /// If use_device_ptr is used on a pointer which is a struct member and there
7056   /// is no map information about it, then emission of that entry is deferred
7057   /// until the whole struct has been processed.
7058   struct DeferredDevicePtrEntryTy {
7059     const Expr *IE = nullptr;
7060     const ValueDecl *VD = nullptr;
7061 
7062     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD)
7063         : IE(IE), VD(VD) {}
7064   };
7065 
7066   /// Directive from where the map clauses were extracted.
7067   const OMPExecutableDirective &CurDir;
7068 
7069   /// Function the directive is being generated for.
7070   CodeGenFunction &CGF;
7071 
7072   /// Set of all first private variables in the current directive.
7073   llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls;
7074 
7075   /// Map between device pointer declarations and their expression components.
7076   /// The key value for declarations in 'this' is null.
7077   llvm::DenseMap<
7078       const ValueDecl *,
7079       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7080       DevPointersMap;
7081 
7082   llvm::Value *getExprTypeSize(const Expr *E) const {
7083     QualType ExprTy = E->getType().getCanonicalType();
7084 
7085     // Reference types are ignored for mapping purposes.
7086     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7087       ExprTy = RefTy->getPointeeType().getCanonicalType();
7088 
7089     // Given that an array section is considered a built-in type, we need to
7090     // do the calculation based on the length of the section instead of relying
7091     // on CGF.getTypeSize(E->getType()).
7092     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7093       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7094                             OAE->getBase()->IgnoreParenImpCasts())
7095                             .getCanonicalType();
7096 
7097       // If there is no length associated with the expression, that means we
7098       // are using the whole length of the base.
7099       if (!OAE->getLength() && OAE->getColonLoc().isValid())
7100         return CGF.getTypeSize(BaseTy);
7101 
7102       llvm::Value *ElemSize;
7103       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7104         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7105       } else {
7106         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7107         assert(ATy && "Expecting array type if not a pointer type.");
7108         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7109       }
7110 
7111       // If we don't have a length at this point, that is because we have an
7112       // array section with a single element.
7113       if (!OAE->getLength())
7114         return ElemSize;
7115 
7116       llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength());
7117       LengthVal =
7118           CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false);
7119       return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7120     }
7121     return CGF.getTypeSize(ExprTy);
7122   }
7123 
7124   /// Return the corresponding bits for a given map clause modifier. Add
7125   /// a flag marking the map as a pointer if requested. Add a flag marking the
7126   /// map as the first one of a series of maps that relate to the same map
7127   /// expression.
7128   OpenMPOffloadMappingFlags getMapTypeBits(
7129       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7130       bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7131     OpenMPOffloadMappingFlags Bits =
7132         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7133     switch (MapType) {
7134     case OMPC_MAP_alloc:
7135     case OMPC_MAP_release:
7136       // alloc and release is the default behavior in the runtime library,  i.e.
7137       // if we don't pass any bits alloc/release that is what the runtime is
7138       // going to do. Therefore, we don't need to signal anything for these two
7139       // type modifiers.
7140       break;
7141     case OMPC_MAP_to:
7142       Bits |= OMP_MAP_TO;
7143       break;
7144     case OMPC_MAP_from:
7145       Bits |= OMP_MAP_FROM;
7146       break;
7147     case OMPC_MAP_tofrom:
7148       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7149       break;
7150     case OMPC_MAP_delete:
7151       Bits |= OMP_MAP_DELETE;
7152       break;
7153     case OMPC_MAP_unknown:
7154       llvm_unreachable("Unexpected map type!");
7155     }
7156     if (AddPtrFlag)
7157       Bits |= OMP_MAP_PTR_AND_OBJ;
7158     if (AddIsTargetParamFlag)
7159       Bits |= OMP_MAP_TARGET_PARAM;
7160     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7161         != MapModifiers.end())
7162       Bits |= OMP_MAP_ALWAYS;
7163     return Bits;
7164   }
7165 
7166   /// Return true if the provided expression is a final array section. A
7167   /// final array section, is one whose length can't be proved to be one.
7168   bool isFinalArraySectionExpression(const Expr *E) const {
7169     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7170 
7171     // It is not an array section and therefore not a unity-size one.
7172     if (!OASE)
7173       return false;
7174 
7175     // An array section with no colon always refer to a single element.
7176     if (OASE->getColonLoc().isInvalid())
7177       return false;
7178 
7179     const Expr *Length = OASE->getLength();
7180 
7181     // If we don't have a length we have to check if the array has size 1
7182     // for this dimension. Also, we should always expect a length if the
7183     // base type is pointer.
7184     if (!Length) {
7185       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7186                              OASE->getBase()->IgnoreParenImpCasts())
7187                              .getCanonicalType();
7188       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7189         return ATy->getSize().getSExtValue() != 1;
7190       // If we don't have a constant dimension length, we have to consider
7191       // the current section as having any size, so it is not necessarily
7192       // unitary. If it happen to be unity size, that's user fault.
7193       return true;
7194     }
7195 
7196     // Check if the length evaluates to 1.
7197     Expr::EvalResult Result;
7198     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7199       return true; // Can have more that size 1.
7200 
7201     llvm::APSInt ConstLength = Result.Val.getInt();
7202     return ConstLength.getSExtValue() != 1;
7203   }
7204 
7205   /// Generate the base pointers, section pointers, sizes and map type
7206   /// bits for the provided map type, map modifier, and expression components.
7207   /// \a IsFirstComponent should be set to true if the provided set of
7208   /// components is the first associated with a capture.
7209   void generateInfoForComponentList(
7210       OpenMPMapClauseKind MapType,
7211       ArrayRef<OpenMPMapModifierKind> MapModifiers,
7212       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7213       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
7214       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
7215       StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7216       bool IsImplicit,
7217       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7218           OverlappedElements = llvm::None) const {
7219     // The following summarizes what has to be generated for each map and the
7220     // types below. The generated information is expressed in this order:
7221     // base pointer, section pointer, size, flags
7222     // (to add to the ones that come from the map type and modifier).
7223     //
7224     // double d;
7225     // int i[100];
7226     // float *p;
7227     //
7228     // struct S1 {
7229     //   int i;
7230     //   float f[50];
7231     // }
7232     // struct S2 {
7233     //   int i;
7234     //   float f[50];
7235     //   S1 s;
7236     //   double *p;
7237     //   struct S2 *ps;
7238     // }
7239     // S2 s;
7240     // S2 *ps;
7241     //
7242     // map(d)
7243     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7244     //
7245     // map(i)
7246     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7247     //
7248     // map(i[1:23])
7249     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7250     //
7251     // map(p)
7252     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7253     //
7254     // map(p[1:24])
7255     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7256     //
7257     // map(s)
7258     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7259     //
7260     // map(s.i)
7261     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7262     //
7263     // map(s.s.f)
7264     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7265     //
7266     // map(s.p)
7267     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7268     //
7269     // map(to: s.p[:22])
7270     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7271     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7272     // &(s.p), &(s.p[0]), 22*sizeof(double),
7273     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7274     // (*) alloc space for struct members, only this is a target parameter
7275     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7276     //      optimizes this entry out, same in the examples below)
7277     // (***) map the pointee (map: to)
7278     //
7279     // map(s.ps)
7280     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7281     //
7282     // map(from: s.ps->s.i)
7283     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7284     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7285     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7286     //
7287     // map(to: s.ps->ps)
7288     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7289     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7290     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7291     //
7292     // map(s.ps->ps->ps)
7293     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7294     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7295     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7296     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7297     //
7298     // map(to: s.ps->ps->s.f[:22])
7299     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7300     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7301     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7302     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7303     //
7304     // map(ps)
7305     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7306     //
7307     // map(ps->i)
7308     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7309     //
7310     // map(ps->s.f)
7311     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7312     //
7313     // map(from: ps->p)
7314     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7315     //
7316     // map(to: ps->p[:22])
7317     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7318     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7319     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7320     //
7321     // map(ps->ps)
7322     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7323     //
7324     // map(from: ps->ps->s.i)
7325     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7326     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7327     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7328     //
7329     // map(from: ps->ps->ps)
7330     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7331     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7332     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7333     //
7334     // map(ps->ps->ps->ps)
7335     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7336     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7337     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7338     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7339     //
7340     // map(to: ps->ps->ps->s.f[:22])
7341     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7342     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7343     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7344     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7345     //
7346     // map(to: s.f[:22]) map(from: s.p[:33])
7347     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7348     //     sizeof(double*) (**), TARGET_PARAM
7349     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7350     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7351     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7352     // (*) allocate contiguous space needed to fit all mapped members even if
7353     //     we allocate space for members not mapped (in this example,
7354     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7355     //     them as well because they fall between &s.f[0] and &s.p)
7356     //
7357     // map(from: s.f[:22]) map(to: ps->p[:33])
7358     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7359     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7360     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7361     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7362     // (*) the struct this entry pertains to is the 2nd element in the list of
7363     //     arguments, hence MEMBER_OF(2)
7364     //
7365     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7366     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7367     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7368     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7369     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7370     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7371     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7372     // (*) the struct this entry pertains to is the 4th element in the list
7373     //     of arguments, hence MEMBER_OF(4)
7374 
7375     // Track if the map information being generated is the first for a capture.
7376     bool IsCaptureFirstInfo = IsFirstComponentList;
7377     bool IsLink = false; // Is this variable a "declare target link"?
7378 
7379     // Scan the components from the base to the complete expression.
7380     auto CI = Components.rbegin();
7381     auto CE = Components.rend();
7382     auto I = CI;
7383 
7384     // Track if the map information being generated is the first for a list of
7385     // components.
7386     bool IsExpressionFirstInfo = true;
7387     Address BP = Address::invalid();
7388     const Expr *AssocExpr = I->getAssociatedExpression();
7389     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7390     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7391 
7392     if (isa<MemberExpr>(AssocExpr)) {
7393       // The base is the 'this' pointer. The content of the pointer is going
7394       // to be the base of the field being mapped.
7395       BP = CGF.LoadCXXThisAddress();
7396     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7397                (OASE &&
7398                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7399       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7400     } else {
7401       // The base is the reference to the variable.
7402       // BP = &Var.
7403       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7404       if (const auto *VD =
7405               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7406         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7407                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD))
7408           if (*Res == OMPDeclareTargetDeclAttr::MT_Link) {
7409             IsLink = true;
7410             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD);
7411           }
7412       }
7413 
7414       // If the variable is a pointer and is being dereferenced (i.e. is not
7415       // the last component), the base has to be the pointer itself, not its
7416       // reference. References are ignored for mapping purposes.
7417       QualType Ty =
7418           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7419       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7420         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7421 
7422         // We do not need to generate individual map information for the
7423         // pointer, it can be associated with the combined storage.
7424         ++I;
7425       }
7426     }
7427 
7428     // Track whether a component of the list should be marked as MEMBER_OF some
7429     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7430     // in a component list should be marked as MEMBER_OF, all subsequent entries
7431     // do not belong to the base struct. E.g.
7432     // struct S2 s;
7433     // s.ps->ps->ps->f[:]
7434     //   (1) (2) (3) (4)
7435     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7436     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7437     // is the pointee of ps(2) which is not member of struct s, so it should not
7438     // be marked as such (it is still PTR_AND_OBJ).
7439     // The variable is initialized to false so that PTR_AND_OBJ entries which
7440     // are not struct members are not considered (e.g. array of pointers to
7441     // data).
7442     bool ShouldBeMemberOf = false;
7443 
7444     // Variable keeping track of whether or not we have encountered a component
7445     // in the component list which is a member expression. Useful when we have a
7446     // pointer or a final array section, in which case it is the previous
7447     // component in the list which tells us whether we have a member expression.
7448     // E.g. X.f[:]
7449     // While processing the final array section "[:]" it is "f" which tells us
7450     // whether we are dealing with a member of a declared struct.
7451     const MemberExpr *EncounteredME = nullptr;
7452 
7453     for (; I != CE; ++I) {
7454       // If the current component is member of a struct (parent struct) mark it.
7455       if (!EncounteredME) {
7456         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7457         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7458         // as MEMBER_OF the parent struct.
7459         if (EncounteredME)
7460           ShouldBeMemberOf = true;
7461       }
7462 
7463       auto Next = std::next(I);
7464 
7465       // We need to generate the addresses and sizes if this is the last
7466       // component, if the component is a pointer or if it is an array section
7467       // whose length can't be proved to be one. If this is a pointer, it
7468       // becomes the base address for the following components.
7469 
7470       // A final array section, is one whose length can't be proved to be one.
7471       bool IsFinalArraySection =
7472           isFinalArraySectionExpression(I->getAssociatedExpression());
7473 
7474       // Get information on whether the element is a pointer. Have to do a
7475       // special treatment for array sections given that they are built-in
7476       // types.
7477       const auto *OASE =
7478           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7479       bool IsPointer =
7480           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7481                        .getCanonicalType()
7482                        ->isAnyPointerType()) ||
7483           I->getAssociatedExpression()->getType()->isAnyPointerType();
7484 
7485       if (Next == CE || IsPointer || IsFinalArraySection) {
7486         // If this is not the last component, we expect the pointer to be
7487         // associated with an array expression or member expression.
7488         assert((Next == CE ||
7489                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7490                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7491                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
7492                "Unexpected expression");
7493 
7494         Address LB =
7495             CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress();
7496 
7497         // If this component is a pointer inside the base struct then we don't
7498         // need to create any entry for it - it will be combined with the object
7499         // it is pointing to into a single PTR_AND_OBJ entry.
7500         bool IsMemberPointer =
7501             IsPointer && EncounteredME &&
7502             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7503              EncounteredME);
7504         if (!OverlappedElements.empty()) {
7505           // Handle base element with the info for overlapped elements.
7506           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7507           assert(Next == CE &&
7508                  "Expected last element for the overlapped elements.");
7509           assert(!IsPointer &&
7510                  "Unexpected base element with the pointer type.");
7511           // Mark the whole struct as the struct that requires allocation on the
7512           // device.
7513           PartialStruct.LowestElem = {0, LB};
7514           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7515               I->getAssociatedExpression()->getType());
7516           Address HB = CGF.Builder.CreateConstGEP(
7517               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7518                                                               CGF.VoidPtrTy),
7519               TypeSize.getQuantity() - 1);
7520           PartialStruct.HighestElem = {
7521               std::numeric_limits<decltype(
7522                   PartialStruct.HighestElem.first)>::max(),
7523               HB};
7524           PartialStruct.Base = BP;
7525           // Emit data for non-overlapped data.
7526           OpenMPOffloadMappingFlags Flags =
7527               OMP_MAP_MEMBER_OF |
7528               getMapTypeBits(MapType, MapModifiers, IsImplicit,
7529                              /*AddPtrFlag=*/false,
7530                              /*AddIsTargetParamFlag=*/false);
7531           LB = BP;
7532           llvm::Value *Size = nullptr;
7533           // Do bitcopy of all non-overlapped structure elements.
7534           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7535                    Component : OverlappedElements) {
7536             Address ComponentLB = Address::invalid();
7537             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7538                  Component) {
7539               if (MC.getAssociatedDeclaration()) {
7540                 ComponentLB =
7541                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7542                         .getAddress();
7543                 Size = CGF.Builder.CreatePtrDiff(
7544                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7545                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7546                 break;
7547               }
7548             }
7549             BasePointers.push_back(BP.getPointer());
7550             Pointers.push_back(LB.getPointer());
7551             Sizes.push_back(Size);
7552             Types.push_back(Flags);
7553             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7554           }
7555           BasePointers.push_back(BP.getPointer());
7556           Pointers.push_back(LB.getPointer());
7557           Size = CGF.Builder.CreatePtrDiff(
7558               CGF.EmitCastToVoidPtr(
7559                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7560               CGF.EmitCastToVoidPtr(LB.getPointer()));
7561           Sizes.push_back(Size);
7562           Types.push_back(Flags);
7563           break;
7564         }
7565         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7566         if (!IsMemberPointer) {
7567           BasePointers.push_back(BP.getPointer());
7568           Pointers.push_back(LB.getPointer());
7569           Sizes.push_back(Size);
7570 
7571           // We need to add a pointer flag for each map that comes from the
7572           // same expression except for the first one. We also need to signal
7573           // this map is the first one that relates with the current capture
7574           // (there is a set of entries for each capture).
7575           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7576               MapType, MapModifiers, IsImplicit,
7577               !IsExpressionFirstInfo || IsLink, IsCaptureFirstInfo && !IsLink);
7578 
7579           if (!IsExpressionFirstInfo) {
7580             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7581             // then we reset the TO/FROM/ALWAYS/DELETE flags.
7582             if (IsPointer)
7583               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7584                          OMP_MAP_DELETE);
7585 
7586             if (ShouldBeMemberOf) {
7587               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7588               // should be later updated with the correct value of MEMBER_OF.
7589               Flags |= OMP_MAP_MEMBER_OF;
7590               // From now on, all subsequent PTR_AND_OBJ entries should not be
7591               // marked as MEMBER_OF.
7592               ShouldBeMemberOf = false;
7593             }
7594           }
7595 
7596           Types.push_back(Flags);
7597         }
7598 
7599         // If we have encountered a member expression so far, keep track of the
7600         // mapped member. If the parent is "*this", then the value declaration
7601         // is nullptr.
7602         if (EncounteredME) {
7603           const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl());
7604           unsigned FieldIndex = FD->getFieldIndex();
7605 
7606           // Update info about the lowest and highest elements for this struct
7607           if (!PartialStruct.Base.isValid()) {
7608             PartialStruct.LowestElem = {FieldIndex, LB};
7609             PartialStruct.HighestElem = {FieldIndex, LB};
7610             PartialStruct.Base = BP;
7611           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7612             PartialStruct.LowestElem = {FieldIndex, LB};
7613           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7614             PartialStruct.HighestElem = {FieldIndex, LB};
7615           }
7616         }
7617 
7618         // If we have a final array section, we are done with this expression.
7619         if (IsFinalArraySection)
7620           break;
7621 
7622         // The pointer becomes the base for the next element.
7623         if (Next != CE)
7624           BP = LB;
7625 
7626         IsExpressionFirstInfo = false;
7627         IsCaptureFirstInfo = false;
7628       }
7629     }
7630   }
7631 
7632   /// Return the adjusted map modifiers if the declaration a capture refers to
7633   /// appears in a first-private clause. This is expected to be used only with
7634   /// directives that start with 'target'.
7635   MappableExprsHandler::OpenMPOffloadMappingFlags
7636   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7637     assert(Cap.capturesVariable() && "Expected capture by reference only!");
7638 
7639     // A first private variable captured by reference will use only the
7640     // 'private ptr' and 'map to' flag. Return the right flags if the captured
7641     // declaration is known as first-private in this handler.
7642     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7643       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
7644           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
7645         return MappableExprsHandler::OMP_MAP_ALWAYS |
7646                MappableExprsHandler::OMP_MAP_TO;
7647       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7648         return MappableExprsHandler::OMP_MAP_TO |
7649                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
7650       return MappableExprsHandler::OMP_MAP_PRIVATE |
7651              MappableExprsHandler::OMP_MAP_TO;
7652     }
7653     return MappableExprsHandler::OMP_MAP_TO |
7654            MappableExprsHandler::OMP_MAP_FROM;
7655   }
7656 
7657   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7658     // Member of is given by the 16 MSB of the flag, so rotate by 48 bits.
7659     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7660                                                   << 48);
7661   }
7662 
7663   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7664                                      OpenMPOffloadMappingFlags MemberOfFlag) {
7665     // If the entry is PTR_AND_OBJ but has not been marked with the special
7666     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7667     // marked as MEMBER_OF.
7668     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
7669         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
7670       return;
7671 
7672     // Reset the placeholder value to prepare the flag for the assignment of the
7673     // proper MEMBER_OF value.
7674     Flags &= ~OMP_MAP_MEMBER_OF;
7675     Flags |= MemberOfFlag;
7676   }
7677 
7678   void getPlainLayout(const CXXRecordDecl *RD,
7679                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7680                       bool AsBase) const {
7681     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7682 
7683     llvm::StructType *St =
7684         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7685 
7686     unsigned NumElements = St->getNumElements();
7687     llvm::SmallVector<
7688         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7689         RecordLayout(NumElements);
7690 
7691     // Fill bases.
7692     for (const auto &I : RD->bases()) {
7693       if (I.isVirtual())
7694         continue;
7695       const auto *Base = I.getType()->getAsCXXRecordDecl();
7696       // Ignore empty bases.
7697       if (Base->isEmpty() || CGF.getContext()
7698                                  .getASTRecordLayout(Base)
7699                                  .getNonVirtualSize()
7700                                  .isZero())
7701         continue;
7702 
7703       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7704       RecordLayout[FieldIndex] = Base;
7705     }
7706     // Fill in virtual bases.
7707     for (const auto &I : RD->vbases()) {
7708       const auto *Base = I.getType()->getAsCXXRecordDecl();
7709       // Ignore empty bases.
7710       if (Base->isEmpty())
7711         continue;
7712       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7713       if (RecordLayout[FieldIndex])
7714         continue;
7715       RecordLayout[FieldIndex] = Base;
7716     }
7717     // Fill in all the fields.
7718     assert(!RD->isUnion() && "Unexpected union.");
7719     for (const auto *Field : RD->fields()) {
7720       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7721       // will fill in later.)
7722       if (!Field->isBitField()) {
7723         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7724         RecordLayout[FieldIndex] = Field;
7725       }
7726     }
7727     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7728              &Data : RecordLayout) {
7729       if (Data.isNull())
7730         continue;
7731       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7732         getPlainLayout(Base, Layout, /*AsBase=*/true);
7733       else
7734         Layout.push_back(Data.get<const FieldDecl *>());
7735     }
7736   }
7737 
7738 public:
7739   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
7740       : CurDir(Dir), CGF(CGF) {
7741     // Extract firstprivate clause information.
7742     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
7743       for (const auto *D : C->varlists())
7744         FirstPrivateDecls.insert(
7745             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
7746     // Extract device pointer clause information.
7747     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
7748       for (auto L : C->component_lists())
7749         DevPointersMap[L.first].push_back(L.second);
7750   }
7751 
7752   /// Generate code for the combined entry if we have a partially mapped struct
7753   /// and take care of the mapping flags of the arguments corresponding to
7754   /// individual struct members.
7755   void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
7756                          MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7757                          MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
7758                          const StructRangeInfoTy &PartialStruct) const {
7759     // Base is the base of the struct
7760     BasePointers.push_back(PartialStruct.Base.getPointer());
7761     // Pointer is the address of the lowest element
7762     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
7763     Pointers.push_back(LB);
7764     // Size is (addr of {highest+1} element) - (addr of lowest element)
7765     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
7766     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
7767     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
7768     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
7769     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
7770     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.SizeTy,
7771                                                   /*isSinged=*/false);
7772     Sizes.push_back(Size);
7773     // Map type is always TARGET_PARAM
7774     Types.push_back(OMP_MAP_TARGET_PARAM);
7775     // Remove TARGET_PARAM flag from the first element
7776     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
7777 
7778     // All other current entries will be MEMBER_OF the combined entry
7779     // (except for PTR_AND_OBJ entries which do not have a placeholder value
7780     // 0xFFFF in the MEMBER_OF field).
7781     OpenMPOffloadMappingFlags MemberOfFlag =
7782         getMemberOfFlag(BasePointers.size() - 1);
7783     for (auto &M : CurTypes)
7784       setCorrectMemberOfFlag(M, MemberOfFlag);
7785   }
7786 
7787   /// Generate all the base pointers, section pointers, sizes and map
7788   /// types for the extracted mappable expressions. Also, for each item that
7789   /// relates with a device pointer, a pair of the relevant declaration and
7790   /// index where it occurs is appended to the device pointers info array.
7791   void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
7792                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7793                        MapFlagsArrayTy &Types) const {
7794     // We have to process the component lists that relate with the same
7795     // declaration in a single chunk so that we can generate the map flags
7796     // correctly. Therefore, we organize all lists in a map.
7797     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
7798 
7799     // Helper function to fill the information map for the different supported
7800     // clauses.
7801     auto &&InfoGen = [&Info](
7802         const ValueDecl *D,
7803         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7804         OpenMPMapClauseKind MapType,
7805         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7806         bool ReturnDevicePointer, bool IsImplicit) {
7807       const ValueDecl *VD =
7808           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
7809       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
7810                             IsImplicit);
7811     };
7812 
7813     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
7814     for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
7815       for (const auto &L : C->component_lists()) {
7816         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
7817             /*ReturnDevicePointer=*/false, C->isImplicit());
7818       }
7819     for (const auto *C : this->CurDir.getClausesOfKind<OMPToClause>())
7820       for (const auto &L : C->component_lists()) {
7821         InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
7822             /*ReturnDevicePointer=*/false, C->isImplicit());
7823       }
7824     for (const auto *C : this->CurDir.getClausesOfKind<OMPFromClause>())
7825       for (const auto &L : C->component_lists()) {
7826         InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
7827             /*ReturnDevicePointer=*/false, C->isImplicit());
7828       }
7829 
7830     // Look at the use_device_ptr clause information and mark the existing map
7831     // entries as such. If there is no map information for an entry in the
7832     // use_device_ptr list, we create one with map type 'alloc' and zero size
7833     // section. It is the user fault if that was not mapped before. If there is
7834     // no map information and the pointer is a struct member, then we defer the
7835     // emission of that entry until the whole struct has been processed.
7836     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
7837         DeferredInfo;
7838 
7839     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
7840     for (const auto *C :
7841         this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) {
7842       for (const auto &L : C->component_lists()) {
7843         assert(!L.second.empty() && "Not expecting empty list of components!");
7844         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
7845         VD = cast<ValueDecl>(VD->getCanonicalDecl());
7846         const Expr *IE = L.second.back().getAssociatedExpression();
7847         // If the first component is a member expression, we have to look into
7848         // 'this', which maps to null in the map of map information. Otherwise
7849         // look directly for the information.
7850         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7851 
7852         // We potentially have map information for this declaration already.
7853         // Look for the first set of components that refer to it.
7854         if (It != Info.end()) {
7855           auto CI = std::find_if(
7856               It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
7857                 return MI.Components.back().getAssociatedDeclaration() == VD;
7858               });
7859           // If we found a map entry, signal that the pointer has to be returned
7860           // and move on to the next declaration.
7861           if (CI != It->second.end()) {
7862             CI->ReturnDevicePointer = true;
7863             continue;
7864           }
7865         }
7866 
7867         // We didn't find any match in our map information - generate a zero
7868         // size array section - if the pointer is a struct member we defer this
7869         // action until the whole struct has been processed.
7870         // FIXME: MSVC 2013 seems to require this-> to find member CGF.
7871         if (isa<MemberExpr>(IE)) {
7872           // Insert the pointer into Info to be processed by
7873           // generateInfoForComponentList. Because it is a member pointer
7874           // without a pointee, no entry will be generated for it, therefore
7875           // we need to generate one after the whole struct has been processed.
7876           // Nonetheless, generateInfoForComponentList must be called to take
7877           // the pointer into account for the calculation of the range of the
7878           // partial struct.
7879           InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
7880                   /*ReturnDevicePointer=*/false, C->isImplicit());
7881           DeferredInfo[nullptr].emplace_back(IE, VD);
7882         } else {
7883           llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
7884               this->CGF.EmitLValue(IE), IE->getExprLoc());
7885           BasePointers.emplace_back(Ptr, VD);
7886           Pointers.push_back(Ptr);
7887           Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy));
7888           Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
7889         }
7890       }
7891     }
7892 
7893     for (const auto &M : Info) {
7894       // We need to know when we generate information for the first component
7895       // associated with a capture, because the mapping flags depend on it.
7896       bool IsFirstComponentList = true;
7897 
7898       // Temporary versions of arrays
7899       MapBaseValuesArrayTy CurBasePointers;
7900       MapValuesArrayTy CurPointers;
7901       MapValuesArrayTy CurSizes;
7902       MapFlagsArrayTy CurTypes;
7903       StructRangeInfoTy PartialStruct;
7904 
7905       for (const MapInfo &L : M.second) {
7906         assert(!L.Components.empty() &&
7907                "Not expecting declaration with no component lists.");
7908 
7909         // Remember the current base pointer index.
7910         unsigned CurrentBasePointersIdx = CurBasePointers.size();
7911         // FIXME: MSVC 2013 seems to require this-> to find the member method.
7912         this->generateInfoForComponentList(
7913             L.MapType, L.MapModifiers, L.Components, CurBasePointers,
7914             CurPointers, CurSizes, CurTypes, PartialStruct,
7915             IsFirstComponentList, L.IsImplicit);
7916 
7917         // If this entry relates with a device pointer, set the relevant
7918         // declaration and add the 'return pointer' flag.
7919         if (L.ReturnDevicePointer) {
7920           assert(CurBasePointers.size() > CurrentBasePointersIdx &&
7921                  "Unexpected number of mapped base pointers.");
7922 
7923           const ValueDecl *RelevantVD =
7924               L.Components.back().getAssociatedDeclaration();
7925           assert(RelevantVD &&
7926                  "No relevant declaration related with device pointer??");
7927 
7928           CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
7929           CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
7930         }
7931         IsFirstComponentList = false;
7932       }
7933 
7934       // Append any pending zero-length pointers which are struct members and
7935       // used with use_device_ptr.
7936       auto CI = DeferredInfo.find(M.first);
7937       if (CI != DeferredInfo.end()) {
7938         for (const DeferredDevicePtrEntryTy &L : CI->second) {
7939           llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer();
7940           llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
7941               this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
7942           CurBasePointers.emplace_back(BasePtr, L.VD);
7943           CurPointers.push_back(Ptr);
7944           CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy));
7945           // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
7946           // value MEMBER_OF=FFFF so that the entry is later updated with the
7947           // correct value of MEMBER_OF.
7948           CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
7949                              OMP_MAP_MEMBER_OF);
7950         }
7951       }
7952 
7953       // If there is an entry in PartialStruct it means we have a struct with
7954       // individual members mapped. Emit an extra combined entry.
7955       if (PartialStruct.Base.isValid())
7956         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
7957                           PartialStruct);
7958 
7959       // We need to append the results of this capture to what we already have.
7960       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
7961       Pointers.append(CurPointers.begin(), CurPointers.end());
7962       Sizes.append(CurSizes.begin(), CurSizes.end());
7963       Types.append(CurTypes.begin(), CurTypes.end());
7964     }
7965   }
7966 
7967   /// Emit capture info for lambdas for variables captured by reference.
7968   void generateInfoForLambdaCaptures(
7969       const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
7970       MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7971       MapFlagsArrayTy &Types,
7972       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
7973     const auto *RD = VD->getType()
7974                          .getCanonicalType()
7975                          .getNonReferenceType()
7976                          ->getAsCXXRecordDecl();
7977     if (!RD || !RD->isLambda())
7978       return;
7979     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
7980     LValue VDLVal = CGF.MakeAddrLValue(
7981         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
7982     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
7983     FieldDecl *ThisCapture = nullptr;
7984     RD->getCaptureFields(Captures, ThisCapture);
7985     if (ThisCapture) {
7986       LValue ThisLVal =
7987           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
7988       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
7989       LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer());
7990       BasePointers.push_back(ThisLVal.getPointer());
7991       Pointers.push_back(ThisLValVal.getPointer());
7992       Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
7993       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
7994                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
7995     }
7996     for (const LambdaCapture &LC : RD->captures()) {
7997       if (LC.getCaptureKind() != LCK_ByRef)
7998         continue;
7999       const VarDecl *VD = LC.getCapturedVar();
8000       auto It = Captures.find(VD);
8001       assert(It != Captures.end() && "Found lambda capture without field.");
8002       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8003       LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8004       LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer());
8005       BasePointers.push_back(VarLVal.getPointer());
8006       Pointers.push_back(VarLValVal.getPointer());
8007       Sizes.push_back(CGF.getTypeSize(
8008           VD->getType().getCanonicalType().getNonReferenceType()));
8009       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8010                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8011     }
8012   }
8013 
8014   /// Set correct indices for lambdas captures.
8015   void adjustMemberOfForLambdaCaptures(
8016       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8017       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8018       MapFlagsArrayTy &Types) const {
8019     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8020       // Set correct member_of idx for all implicit lambda captures.
8021       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8022                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8023         continue;
8024       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8025       assert(BasePtr && "Unable to find base lambda address.");
8026       int TgtIdx = -1;
8027       for (unsigned J = I; J > 0; --J) {
8028         unsigned Idx = J - 1;
8029         if (Pointers[Idx] != BasePtr)
8030           continue;
8031         TgtIdx = Idx;
8032         break;
8033       }
8034       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8035       // All other current entries will be MEMBER_OF the combined entry
8036       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8037       // 0xFFFF in the MEMBER_OF field).
8038       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8039       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8040     }
8041   }
8042 
8043   /// Generate the base pointers, section pointers, sizes and map types
8044   /// associated to a given capture.
8045   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8046                               llvm::Value *Arg,
8047                               MapBaseValuesArrayTy &BasePointers,
8048                               MapValuesArrayTy &Pointers,
8049                               MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
8050                               StructRangeInfoTy &PartialStruct) const {
8051     assert(!Cap->capturesVariableArrayType() &&
8052            "Not expecting to generate map info for a variable array type!");
8053 
8054     // We need to know when we generating information for the first component
8055     const ValueDecl *VD = Cap->capturesThis()
8056                               ? nullptr
8057                               : Cap->getCapturedVar()->getCanonicalDecl();
8058 
8059     // If this declaration appears in a is_device_ptr clause we just have to
8060     // pass the pointer by value. If it is a reference to a declaration, we just
8061     // pass its value.
8062     if (DevPointersMap.count(VD)) {
8063       BasePointers.emplace_back(Arg, VD);
8064       Pointers.push_back(Arg);
8065       Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
8066       Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8067       return;
8068     }
8069 
8070     using MapData =
8071         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8072                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
8073     SmallVector<MapData, 4> DeclComponentLists;
8074     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
8075     for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) {
8076       for (const auto &L : C->decl_component_lists(VD)) {
8077         assert(L.first == VD &&
8078                "We got information for the wrong declaration??");
8079         assert(!L.second.empty() &&
8080                "Not expecting declaration with no component lists.");
8081         DeclComponentLists.emplace_back(L.second, C->getMapType(),
8082                                         C->getMapTypeModifiers(),
8083                                         C->isImplicit());
8084       }
8085     }
8086 
8087     // Find overlapping elements (including the offset from the base element).
8088     llvm::SmallDenseMap<
8089         const MapData *,
8090         llvm::SmallVector<
8091             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8092         4>
8093         OverlappedData;
8094     size_t Count = 0;
8095     for (const MapData &L : DeclComponentLists) {
8096       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8097       OpenMPMapClauseKind MapType;
8098       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8099       bool IsImplicit;
8100       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8101       ++Count;
8102       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8103         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8104         std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
8105         auto CI = Components.rbegin();
8106         auto CE = Components.rend();
8107         auto SI = Components1.rbegin();
8108         auto SE = Components1.rend();
8109         for (; CI != CE && SI != SE; ++CI, ++SI) {
8110           if (CI->getAssociatedExpression()->getStmtClass() !=
8111               SI->getAssociatedExpression()->getStmtClass())
8112             break;
8113           // Are we dealing with different variables/fields?
8114           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8115             break;
8116         }
8117         // Found overlapping if, at least for one component, reached the head of
8118         // the components list.
8119         if (CI == CE || SI == SE) {
8120           assert((CI != CE || SI != SE) &&
8121                  "Unexpected full match of the mapping components.");
8122           const MapData &BaseData = CI == CE ? L : L1;
8123           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8124               SI == SE ? Components : Components1;
8125           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8126           OverlappedElements.getSecond().push_back(SubData);
8127         }
8128       }
8129     }
8130     // Sort the overlapped elements for each item.
8131     llvm::SmallVector<const FieldDecl *, 4> Layout;
8132     if (!OverlappedData.empty()) {
8133       if (const auto *CRD =
8134               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8135         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8136       else {
8137         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8138         Layout.append(RD->field_begin(), RD->field_end());
8139       }
8140     }
8141     for (auto &Pair : OverlappedData) {
8142       llvm::sort(
8143           Pair.getSecond(),
8144           [&Layout](
8145               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8146               OMPClauseMappableExprCommon::MappableExprComponentListRef
8147                   Second) {
8148             auto CI = First.rbegin();
8149             auto CE = First.rend();
8150             auto SI = Second.rbegin();
8151             auto SE = Second.rend();
8152             for (; CI != CE && SI != SE; ++CI, ++SI) {
8153               if (CI->getAssociatedExpression()->getStmtClass() !=
8154                   SI->getAssociatedExpression()->getStmtClass())
8155                 break;
8156               // Are we dealing with different variables/fields?
8157               if (CI->getAssociatedDeclaration() !=
8158                   SI->getAssociatedDeclaration())
8159                 break;
8160             }
8161 
8162             // Lists contain the same elements.
8163             if (CI == CE && SI == SE)
8164               return false;
8165 
8166             // List with less elements is less than list with more elements.
8167             if (CI == CE || SI == SE)
8168               return CI == CE;
8169 
8170             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8171             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8172             if (FD1->getParent() == FD2->getParent())
8173               return FD1->getFieldIndex() < FD2->getFieldIndex();
8174             const auto It =
8175                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8176                   return FD == FD1 || FD == FD2;
8177                 });
8178             return *It == FD1;
8179           });
8180     }
8181 
8182     // Associated with a capture, because the mapping flags depend on it.
8183     // Go through all of the elements with the overlapped elements.
8184     for (const auto &Pair : OverlappedData) {
8185       const MapData &L = *Pair.getFirst();
8186       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8187       OpenMPMapClauseKind MapType;
8188       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8189       bool IsImplicit;
8190       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8191       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8192           OverlappedComponents = Pair.getSecond();
8193       bool IsFirstComponentList = true;
8194       generateInfoForComponentList(MapType, MapModifiers, Components,
8195                                    BasePointers, Pointers, Sizes, Types,
8196                                    PartialStruct, IsFirstComponentList,
8197                                    IsImplicit, OverlappedComponents);
8198     }
8199     // Go through other elements without overlapped elements.
8200     bool IsFirstComponentList = OverlappedData.empty();
8201     for (const MapData &L : DeclComponentLists) {
8202       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8203       OpenMPMapClauseKind MapType;
8204       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8205       bool IsImplicit;
8206       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8207       auto It = OverlappedData.find(&L);
8208       if (It == OverlappedData.end())
8209         generateInfoForComponentList(MapType, MapModifiers, Components,
8210                                      BasePointers, Pointers, Sizes, Types,
8211                                      PartialStruct, IsFirstComponentList,
8212                                      IsImplicit);
8213       IsFirstComponentList = false;
8214     }
8215   }
8216 
8217   /// Generate the base pointers, section pointers, sizes and map types
8218   /// associated with the declare target link variables.
8219   void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
8220                                         MapValuesArrayTy &Pointers,
8221                                         MapValuesArrayTy &Sizes,
8222                                         MapFlagsArrayTy &Types) const {
8223     // Map other list items in the map clause which are not captured variables
8224     // but "declare target link" global variables.,
8225     for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) {
8226       for (const auto &L : C->component_lists()) {
8227         if (!L.first)
8228           continue;
8229         const auto *VD = dyn_cast<VarDecl>(L.first);
8230         if (!VD)
8231           continue;
8232         llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8233             OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
8234         if (!Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
8235           continue;
8236         StructRangeInfoTy PartialStruct;
8237         generateInfoForComponentList(
8238             C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
8239             Pointers, Sizes, Types, PartialStruct,
8240             /*IsFirstComponentList=*/true, C->isImplicit());
8241         assert(!PartialStruct.Base.isValid() &&
8242                "No partial structs for declare target link expected.");
8243       }
8244     }
8245   }
8246 
8247   /// Generate the default map information for a given capture \a CI,
8248   /// record field declaration \a RI and captured value \a CV.
8249   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8250                               const FieldDecl &RI, llvm::Value *CV,
8251                               MapBaseValuesArrayTy &CurBasePointers,
8252                               MapValuesArrayTy &CurPointers,
8253                               MapValuesArrayTy &CurSizes,
8254                               MapFlagsArrayTy &CurMapTypes) const {
8255     // Do the default mapping.
8256     if (CI.capturesThis()) {
8257       CurBasePointers.push_back(CV);
8258       CurPointers.push_back(CV);
8259       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8260       CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType()));
8261       // Default map type.
8262       CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8263     } else if (CI.capturesVariableByCopy()) {
8264       CurBasePointers.push_back(CV);
8265       CurPointers.push_back(CV);
8266       if (!RI.getType()->isAnyPointerType()) {
8267         // We have to signal to the runtime captures passed by value that are
8268         // not pointers.
8269         CurMapTypes.push_back(OMP_MAP_LITERAL);
8270         CurSizes.push_back(CGF.getTypeSize(RI.getType()));
8271       } else {
8272         // Pointers are implicitly mapped with a zero size and no flags
8273         // (other than first map that is added for all implicit maps).
8274         CurMapTypes.push_back(OMP_MAP_NONE);
8275         CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy));
8276       }
8277     } else {
8278       assert(CI.capturesVariable() && "Expected captured reference.");
8279       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8280       QualType ElementType = PtrTy->getPointeeType();
8281       CurSizes.push_back(CGF.getTypeSize(ElementType));
8282       // The default map type for a scalar/complex type is 'to' because by
8283       // default the value doesn't have to be retrieved. For an aggregate
8284       // type, the default is 'tofrom'.
8285       CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
8286       const VarDecl *VD = CI.getCapturedVar();
8287       if (FirstPrivateDecls.count(VD) &&
8288           VD->getType().isConstant(CGF.getContext())) {
8289         llvm::Constant *Addr =
8290             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8291         // Copy the value of the original variable to the new global copy.
8292         CGF.Builder.CreateMemCpy(
8293             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(),
8294             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8295             CurSizes.back(), /*isVolatile=*/false);
8296         // Use new global variable as the base pointers.
8297         CurBasePointers.push_back(Addr);
8298         CurPointers.push_back(Addr);
8299       } else {
8300         CurBasePointers.push_back(CV);
8301         if (FirstPrivateDecls.count(VD) && ElementType->isAnyPointerType()) {
8302           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8303               CV, ElementType, CGF.getContext().getDeclAlign(VD),
8304               AlignmentSource::Decl));
8305           CurPointers.push_back(PtrAddr.getPointer());
8306         } else {
8307           CurPointers.push_back(CV);
8308         }
8309       }
8310     }
8311     // Every default map produces a single argument which is a target parameter.
8312     CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
8313 
8314     // Add flag stating this is an implicit map.
8315     CurMapTypes.back() |= OMP_MAP_IMPLICIT;
8316   }
8317 };
8318 
8319 enum OpenMPOffloadingReservedDeviceIDs {
8320   /// Device ID if the device was not defined, runtime should get it
8321   /// from environment variables in the spec.
8322   OMP_DEVICEID_UNDEF = -1,
8323 };
8324 } // anonymous namespace
8325 
8326 /// Emit the arrays used to pass the captures and map information to the
8327 /// offloading runtime library. If there is no map or capture information,
8328 /// return nullptr by reference.
8329 static void
8330 emitOffloadingArrays(CodeGenFunction &CGF,
8331                      MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
8332                      MappableExprsHandler::MapValuesArrayTy &Pointers,
8333                      MappableExprsHandler::MapValuesArrayTy &Sizes,
8334                      MappableExprsHandler::MapFlagsArrayTy &MapTypes,
8335                      CGOpenMPRuntime::TargetDataInfo &Info) {
8336   CodeGenModule &CGM = CGF.CGM;
8337   ASTContext &Ctx = CGF.getContext();
8338 
8339   // Reset the array information.
8340   Info.clearArrayInfo();
8341   Info.NumberOfPtrs = BasePointers.size();
8342 
8343   if (Info.NumberOfPtrs) {
8344     // Detect if we have any capture size requiring runtime evaluation of the
8345     // size so that a constant array could be eventually used.
8346     bool hasRuntimeEvaluationCaptureSize = false;
8347     for (llvm::Value *S : Sizes)
8348       if (!isa<llvm::Constant>(S)) {
8349         hasRuntimeEvaluationCaptureSize = true;
8350         break;
8351       }
8352 
8353     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
8354     QualType PointerArrayType =
8355         Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
8356                                  /*IndexTypeQuals=*/0);
8357 
8358     Info.BasePointersArray =
8359         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
8360     Info.PointersArray =
8361         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
8362 
8363     // If we don't have any VLA types or other types that require runtime
8364     // evaluation, we can use a constant array for the map sizes, otherwise we
8365     // need to fill up the arrays as we do for the pointers.
8366     if (hasRuntimeEvaluationCaptureSize) {
8367       QualType SizeArrayType = Ctx.getConstantArrayType(
8368           Ctx.getSizeType(), PointerNumAP, ArrayType::Normal,
8369           /*IndexTypeQuals=*/0);
8370       Info.SizesArray =
8371           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
8372     } else {
8373       // We expect all the sizes to be constant, so we collect them to create
8374       // a constant array.
8375       SmallVector<llvm::Constant *, 16> ConstSizes;
8376       for (llvm::Value *S : Sizes)
8377         ConstSizes.push_back(cast<llvm::Constant>(S));
8378 
8379       auto *SizesArrayInit = llvm::ConstantArray::get(
8380           llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes);
8381       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
8382       auto *SizesArrayGbl = new llvm::GlobalVariable(
8383           CGM.getModule(), SizesArrayInit->getType(),
8384           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8385           SizesArrayInit, Name);
8386       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8387       Info.SizesArray = SizesArrayGbl;
8388     }
8389 
8390     // The map types are always constant so we don't need to generate code to
8391     // fill arrays. Instead, we create an array constant.
8392     SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
8393     llvm::copy(MapTypes, Mapping.begin());
8394     llvm::Constant *MapTypesArrayInit =
8395         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8396     std::string MaptypesName =
8397         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8398     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
8399         CGM.getModule(), MapTypesArrayInit->getType(),
8400         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8401         MapTypesArrayInit, MaptypesName);
8402     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8403     Info.MapTypesArray = MapTypesArrayGbl;
8404 
8405     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
8406       llvm::Value *BPVal = *BasePointers[I];
8407       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
8408           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8409           Info.BasePointersArray, 0, I);
8410       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8411           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8412       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8413       CGF.Builder.CreateStore(BPVal, BPAddr);
8414 
8415       if (Info.requiresDevicePointerInfo())
8416         if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
8417           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8418 
8419       llvm::Value *PVal = Pointers[I];
8420       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
8421           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8422           Info.PointersArray, 0, I);
8423       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8424           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8425       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8426       CGF.Builder.CreateStore(PVal, PAddr);
8427 
8428       if (hasRuntimeEvaluationCaptureSize) {
8429         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
8430             llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs),
8431             Info.SizesArray,
8432             /*Idx0=*/0,
8433             /*Idx1=*/I);
8434         Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType()));
8435         CGF.Builder.CreateStore(
8436             CGF.Builder.CreateIntCast(Sizes[I], CGM.SizeTy, /*isSigned=*/true),
8437             SAddr);
8438       }
8439     }
8440   }
8441 }
8442 /// Emit the arguments to be passed to the runtime library based on the
8443 /// arrays of pointers, sizes and map types.
8444 static void emitOffloadingArraysArgument(
8445     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8446     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8447     llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
8448   CodeGenModule &CGM = CGF.CGM;
8449   if (Info.NumberOfPtrs) {
8450     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8451         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8452         Info.BasePointersArray,
8453         /*Idx0=*/0, /*Idx1=*/0);
8454     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8455         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8456         Info.PointersArray,
8457         /*Idx0=*/0,
8458         /*Idx1=*/0);
8459     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8460         llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray,
8461         /*Idx0=*/0, /*Idx1=*/0);
8462     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8463         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8464         Info.MapTypesArray,
8465         /*Idx0=*/0,
8466         /*Idx1=*/0);
8467   } else {
8468     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8469     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8470     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo());
8471     MapTypesArrayArg =
8472         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8473   }
8474 }
8475 
8476 /// Check for inner distribute directive.
8477 static const OMPExecutableDirective *
8478 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8479   const auto *CS = D.getInnermostCapturedStmt();
8480   const auto *Body =
8481       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8482   const Stmt *ChildStmt =
8483       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8484 
8485   if (const auto *NestedDir =
8486           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8487     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8488     switch (D.getDirectiveKind()) {
8489     case OMPD_target:
8490       if (isOpenMPDistributeDirective(DKind))
8491         return NestedDir;
8492       if (DKind == OMPD_teams) {
8493         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8494             /*IgnoreCaptured=*/true);
8495         if (!Body)
8496           return nullptr;
8497         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8498         if (const auto *NND =
8499                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8500           DKind = NND->getDirectiveKind();
8501           if (isOpenMPDistributeDirective(DKind))
8502             return NND;
8503         }
8504       }
8505       return nullptr;
8506     case OMPD_target_teams:
8507       if (isOpenMPDistributeDirective(DKind))
8508         return NestedDir;
8509       return nullptr;
8510     case OMPD_target_parallel:
8511     case OMPD_target_simd:
8512     case OMPD_target_parallel_for:
8513     case OMPD_target_parallel_for_simd:
8514       return nullptr;
8515     case OMPD_target_teams_distribute:
8516     case OMPD_target_teams_distribute_simd:
8517     case OMPD_target_teams_distribute_parallel_for:
8518     case OMPD_target_teams_distribute_parallel_for_simd:
8519     case OMPD_parallel:
8520     case OMPD_for:
8521     case OMPD_parallel_for:
8522     case OMPD_parallel_sections:
8523     case OMPD_for_simd:
8524     case OMPD_parallel_for_simd:
8525     case OMPD_cancel:
8526     case OMPD_cancellation_point:
8527     case OMPD_ordered:
8528     case OMPD_threadprivate:
8529     case OMPD_allocate:
8530     case OMPD_task:
8531     case OMPD_simd:
8532     case OMPD_sections:
8533     case OMPD_section:
8534     case OMPD_single:
8535     case OMPD_master:
8536     case OMPD_critical:
8537     case OMPD_taskyield:
8538     case OMPD_barrier:
8539     case OMPD_taskwait:
8540     case OMPD_taskgroup:
8541     case OMPD_atomic:
8542     case OMPD_flush:
8543     case OMPD_teams:
8544     case OMPD_target_data:
8545     case OMPD_target_exit_data:
8546     case OMPD_target_enter_data:
8547     case OMPD_distribute:
8548     case OMPD_distribute_simd:
8549     case OMPD_distribute_parallel_for:
8550     case OMPD_distribute_parallel_for_simd:
8551     case OMPD_teams_distribute:
8552     case OMPD_teams_distribute_simd:
8553     case OMPD_teams_distribute_parallel_for:
8554     case OMPD_teams_distribute_parallel_for_simd:
8555     case OMPD_target_update:
8556     case OMPD_declare_simd:
8557     case OMPD_declare_target:
8558     case OMPD_end_declare_target:
8559     case OMPD_declare_reduction:
8560     case OMPD_declare_mapper:
8561     case OMPD_taskloop:
8562     case OMPD_taskloop_simd:
8563     case OMPD_requires:
8564     case OMPD_unknown:
8565       llvm_unreachable("Unexpected directive.");
8566     }
8567   }
8568 
8569   return nullptr;
8570 }
8571 
8572 void CGOpenMPRuntime::emitTargetNumIterationsCall(
8573     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device,
8574     const llvm::function_ref<llvm::Value *(
8575         CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) {
8576   OpenMPDirectiveKind Kind = D.getDirectiveKind();
8577   const OMPExecutableDirective *TD = &D;
8578   // Get nested teams distribute kind directive, if any.
8579   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
8580     TD = getNestedDistributeDirective(CGM.getContext(), D);
8581   if (!TD)
8582     return;
8583   const auto *LD = cast<OMPLoopDirective>(TD);
8584   auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF,
8585                                                      PrePostActionTy &) {
8586     llvm::Value *NumIterations = SizeEmitter(CGF, *LD);
8587 
8588     // Emit device ID if any.
8589     llvm::Value *DeviceID;
8590     if (Device)
8591       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
8592                                            CGF.Int64Ty, /*isSigned=*/true);
8593     else
8594       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
8595 
8596     llvm::Value *Args[] = {DeviceID, NumIterations};
8597     CGF.EmitRuntimeCall(
8598         createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
8599   };
8600   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
8601 }
8602 
8603 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
8604                                      const OMPExecutableDirective &D,
8605                                      llvm::Function *OutlinedFn,
8606                                      llvm::Value *OutlinedFnID,
8607                                      const Expr *IfCond, const Expr *Device) {
8608   if (!CGF.HaveInsertPoint())
8609     return;
8610 
8611   assert(OutlinedFn && "Invalid outlined function!");
8612 
8613   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
8614   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
8615   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
8616   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
8617                                             PrePostActionTy &) {
8618     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
8619   };
8620   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
8621 
8622   CodeGenFunction::OMPTargetDataInfo InputInfo;
8623   llvm::Value *MapTypesArray = nullptr;
8624   // Fill up the pointer arrays and transfer execution to the device.
8625   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
8626                     &MapTypesArray, &CS, RequiresOuterTask,
8627                     &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) {
8628     // On top of the arrays that were filled up, the target offloading call
8629     // takes as arguments the device id as well as the host pointer. The host
8630     // pointer is used by the runtime library to identify the current target
8631     // region, so it only has to be unique and not necessarily point to
8632     // anything. It could be the pointer to the outlined function that
8633     // implements the target region, but we aren't using that so that the
8634     // compiler doesn't need to keep that, and could therefore inline the host
8635     // function if proven worthwhile during optimization.
8636 
8637     // From this point on, we need to have an ID of the target region defined.
8638     assert(OutlinedFnID && "Invalid outlined function ID!");
8639 
8640     // Emit device ID if any.
8641     llvm::Value *DeviceID;
8642     if (Device) {
8643       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
8644                                            CGF.Int64Ty, /*isSigned=*/true);
8645     } else {
8646       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
8647     }
8648 
8649     // Emit the number of elements in the offloading arrays.
8650     llvm::Value *PointerNum =
8651         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
8652 
8653     // Return value of the runtime offloading call.
8654     llvm::Value *Return;
8655 
8656     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
8657     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
8658 
8659     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
8660     // The target region is an outlined function launched by the runtime
8661     // via calls __tgt_target() or __tgt_target_teams().
8662     //
8663     // __tgt_target() launches a target region with one team and one thread,
8664     // executing a serial region.  This master thread may in turn launch
8665     // more threads within its team upon encountering a parallel region,
8666     // however, no additional teams can be launched on the device.
8667     //
8668     // __tgt_target_teams() launches a target region with one or more teams,
8669     // each with one or more threads.  This call is required for target
8670     // constructs such as:
8671     //  'target teams'
8672     //  'target' / 'teams'
8673     //  'target teams distribute parallel for'
8674     //  'target parallel'
8675     // and so on.
8676     //
8677     // Note that on the host and CPU targets, the runtime implementation of
8678     // these calls simply call the outlined function without forking threads.
8679     // The outlined functions themselves have runtime calls to
8680     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
8681     // the compiler in emitTeamsCall() and emitParallelCall().
8682     //
8683     // In contrast, on the NVPTX target, the implementation of
8684     // __tgt_target_teams() launches a GPU kernel with the requested number
8685     // of teams and threads so no additional calls to the runtime are required.
8686     if (NumTeams) {
8687       // If we have NumTeams defined this means that we have an enclosed teams
8688       // region. Therefore we also expect to have NumThreads defined. These two
8689       // values should be defined in the presence of a teams directive,
8690       // regardless of having any clauses associated. If the user is using teams
8691       // but no clauses, these two values will be the default that should be
8692       // passed to the runtime library - a 32-bit integer with the value zero.
8693       assert(NumThreads && "Thread limit expression should be available along "
8694                            "with number of teams.");
8695       llvm::Value *OffloadingArgs[] = {DeviceID,
8696                                        OutlinedFnID,
8697                                        PointerNum,
8698                                        InputInfo.BasePointersArray.getPointer(),
8699                                        InputInfo.PointersArray.getPointer(),
8700                                        InputInfo.SizesArray.getPointer(),
8701                                        MapTypesArray,
8702                                        NumTeams,
8703                                        NumThreads};
8704       Return = CGF.EmitRuntimeCall(
8705           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
8706                                           : OMPRTL__tgt_target_teams),
8707           OffloadingArgs);
8708     } else {
8709       llvm::Value *OffloadingArgs[] = {DeviceID,
8710                                        OutlinedFnID,
8711                                        PointerNum,
8712                                        InputInfo.BasePointersArray.getPointer(),
8713                                        InputInfo.PointersArray.getPointer(),
8714                                        InputInfo.SizesArray.getPointer(),
8715                                        MapTypesArray};
8716       Return = CGF.EmitRuntimeCall(
8717           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
8718                                           : OMPRTL__tgt_target),
8719           OffloadingArgs);
8720     }
8721 
8722     // Check the error code and execute the host version if required.
8723     llvm::BasicBlock *OffloadFailedBlock =
8724         CGF.createBasicBlock("omp_offload.failed");
8725     llvm::BasicBlock *OffloadContBlock =
8726         CGF.createBasicBlock("omp_offload.cont");
8727     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
8728     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
8729 
8730     CGF.EmitBlock(OffloadFailedBlock);
8731     if (RequiresOuterTask) {
8732       CapturedVars.clear();
8733       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
8734     }
8735     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
8736     CGF.EmitBranch(OffloadContBlock);
8737 
8738     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
8739   };
8740 
8741   // Notify that the host version must be executed.
8742   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
8743                     RequiresOuterTask](CodeGenFunction &CGF,
8744                                        PrePostActionTy &) {
8745     if (RequiresOuterTask) {
8746       CapturedVars.clear();
8747       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
8748     }
8749     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
8750   };
8751 
8752   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
8753                           &CapturedVars, RequiresOuterTask,
8754                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
8755     // Fill up the arrays with all the captured variables.
8756     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
8757     MappableExprsHandler::MapValuesArrayTy Pointers;
8758     MappableExprsHandler::MapValuesArrayTy Sizes;
8759     MappableExprsHandler::MapFlagsArrayTy MapTypes;
8760 
8761     // Get mappable expression information.
8762     MappableExprsHandler MEHandler(D, CGF);
8763     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
8764 
8765     auto RI = CS.getCapturedRecordDecl()->field_begin();
8766     auto CV = CapturedVars.begin();
8767     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
8768                                               CE = CS.capture_end();
8769          CI != CE; ++CI, ++RI, ++CV) {
8770       MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
8771       MappableExprsHandler::MapValuesArrayTy CurPointers;
8772       MappableExprsHandler::MapValuesArrayTy CurSizes;
8773       MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
8774       MappableExprsHandler::StructRangeInfoTy PartialStruct;
8775 
8776       // VLA sizes are passed to the outlined region by copy and do not have map
8777       // information associated.
8778       if (CI->capturesVariableArrayType()) {
8779         CurBasePointers.push_back(*CV);
8780         CurPointers.push_back(*CV);
8781         CurSizes.push_back(CGF.getTypeSize(RI->getType()));
8782         // Copy to the device as an argument. No need to retrieve it.
8783         CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
8784                               MappableExprsHandler::OMP_MAP_TARGET_PARAM);
8785       } else {
8786         // If we have any information in the map clause, we use it, otherwise we
8787         // just do a default mapping.
8788         MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
8789                                          CurSizes, CurMapTypes, PartialStruct);
8790         if (CurBasePointers.empty())
8791           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
8792                                            CurPointers, CurSizes, CurMapTypes);
8793         // Generate correct mapping for variables captured by reference in
8794         // lambdas.
8795         if (CI->capturesVariable())
8796           MEHandler.generateInfoForLambdaCaptures(
8797               CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
8798               CurMapTypes, LambdaPointers);
8799       }
8800       // We expect to have at least an element of information for this capture.
8801       assert(!CurBasePointers.empty() &&
8802              "Non-existing map pointer for capture!");
8803       assert(CurBasePointers.size() == CurPointers.size() &&
8804              CurBasePointers.size() == CurSizes.size() &&
8805              CurBasePointers.size() == CurMapTypes.size() &&
8806              "Inconsistent map information sizes!");
8807 
8808       // If there is an entry in PartialStruct it means we have a struct with
8809       // individual members mapped. Emit an extra combined entry.
8810       if (PartialStruct.Base.isValid())
8811         MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
8812                                     CurMapTypes, PartialStruct);
8813 
8814       // We need to append the results of this capture to what we already have.
8815       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8816       Pointers.append(CurPointers.begin(), CurPointers.end());
8817       Sizes.append(CurSizes.begin(), CurSizes.end());
8818       MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
8819     }
8820     // Adjust MEMBER_OF flags for the lambdas captures.
8821     MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
8822                                               Pointers, MapTypes);
8823     // Map other list items in the map clause which are not captured variables
8824     // but "declare target link" global variables.
8825     MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
8826                                                MapTypes);
8827 
8828     TargetDataInfo Info;
8829     // Fill up the arrays and create the arguments.
8830     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
8831     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
8832                                  Info.PointersArray, Info.SizesArray,
8833                                  Info.MapTypesArray, Info);
8834     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
8835     InputInfo.BasePointersArray =
8836         Address(Info.BasePointersArray, CGM.getPointerAlign());
8837     InputInfo.PointersArray =
8838         Address(Info.PointersArray, CGM.getPointerAlign());
8839     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
8840     MapTypesArray = Info.MapTypesArray;
8841     if (RequiresOuterTask)
8842       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
8843     else
8844       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
8845   };
8846 
8847   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
8848                              CodeGenFunction &CGF, PrePostActionTy &) {
8849     if (RequiresOuterTask) {
8850       CodeGenFunction::OMPTargetDataInfo InputInfo;
8851       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
8852     } else {
8853       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
8854     }
8855   };
8856 
8857   // If we have a target function ID it means that we need to support
8858   // offloading, otherwise, just execute on the host. We need to execute on host
8859   // regardless of the conditional in the if clause if, e.g., the user do not
8860   // specify target triples.
8861   if (OutlinedFnID) {
8862     if (IfCond) {
8863       emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
8864     } else {
8865       RegionCodeGenTy ThenRCG(TargetThenGen);
8866       ThenRCG(CGF);
8867     }
8868   } else {
8869     RegionCodeGenTy ElseRCG(TargetElseGen);
8870     ElseRCG(CGF);
8871   }
8872 }
8873 
8874 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
8875                                                     StringRef ParentName) {
8876   if (!S)
8877     return;
8878 
8879   // Codegen OMP target directives that offload compute to the device.
8880   bool RequiresDeviceCodegen =
8881       isa<OMPExecutableDirective>(S) &&
8882       isOpenMPTargetExecutionDirective(
8883           cast<OMPExecutableDirective>(S)->getDirectiveKind());
8884 
8885   if (RequiresDeviceCodegen) {
8886     const auto &E = *cast<OMPExecutableDirective>(S);
8887     unsigned DeviceID;
8888     unsigned FileID;
8889     unsigned Line;
8890     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
8891                              FileID, Line);
8892 
8893     // Is this a target region that should not be emitted as an entry point? If
8894     // so just signal we are done with this target region.
8895     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
8896                                                             ParentName, Line))
8897       return;
8898 
8899     switch (E.getDirectiveKind()) {
8900     case OMPD_target:
8901       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
8902                                                    cast<OMPTargetDirective>(E));
8903       break;
8904     case OMPD_target_parallel:
8905       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
8906           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
8907       break;
8908     case OMPD_target_teams:
8909       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
8910           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
8911       break;
8912     case OMPD_target_teams_distribute:
8913       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
8914           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
8915       break;
8916     case OMPD_target_teams_distribute_simd:
8917       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
8918           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
8919       break;
8920     case OMPD_target_parallel_for:
8921       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
8922           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
8923       break;
8924     case OMPD_target_parallel_for_simd:
8925       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
8926           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
8927       break;
8928     case OMPD_target_simd:
8929       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
8930           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
8931       break;
8932     case OMPD_target_teams_distribute_parallel_for:
8933       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
8934           CGM, ParentName,
8935           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
8936       break;
8937     case OMPD_target_teams_distribute_parallel_for_simd:
8938       CodeGenFunction::
8939           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
8940               CGM, ParentName,
8941               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
8942       break;
8943     case OMPD_parallel:
8944     case OMPD_for:
8945     case OMPD_parallel_for:
8946     case OMPD_parallel_sections:
8947     case OMPD_for_simd:
8948     case OMPD_parallel_for_simd:
8949     case OMPD_cancel:
8950     case OMPD_cancellation_point:
8951     case OMPD_ordered:
8952     case OMPD_threadprivate:
8953     case OMPD_allocate:
8954     case OMPD_task:
8955     case OMPD_simd:
8956     case OMPD_sections:
8957     case OMPD_section:
8958     case OMPD_single:
8959     case OMPD_master:
8960     case OMPD_critical:
8961     case OMPD_taskyield:
8962     case OMPD_barrier:
8963     case OMPD_taskwait:
8964     case OMPD_taskgroup:
8965     case OMPD_atomic:
8966     case OMPD_flush:
8967     case OMPD_teams:
8968     case OMPD_target_data:
8969     case OMPD_target_exit_data:
8970     case OMPD_target_enter_data:
8971     case OMPD_distribute:
8972     case OMPD_distribute_simd:
8973     case OMPD_distribute_parallel_for:
8974     case OMPD_distribute_parallel_for_simd:
8975     case OMPD_teams_distribute:
8976     case OMPD_teams_distribute_simd:
8977     case OMPD_teams_distribute_parallel_for:
8978     case OMPD_teams_distribute_parallel_for_simd:
8979     case OMPD_target_update:
8980     case OMPD_declare_simd:
8981     case OMPD_declare_target:
8982     case OMPD_end_declare_target:
8983     case OMPD_declare_reduction:
8984     case OMPD_declare_mapper:
8985     case OMPD_taskloop:
8986     case OMPD_taskloop_simd:
8987     case OMPD_requires:
8988     case OMPD_unknown:
8989       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
8990     }
8991     return;
8992   }
8993 
8994   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
8995     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
8996       return;
8997 
8998     scanForTargetRegionsFunctions(
8999         E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
9000     return;
9001   }
9002 
9003   // If this is a lambda function, look into its body.
9004   if (const auto *L = dyn_cast<LambdaExpr>(S))
9005     S = L->getBody();
9006 
9007   // Keep looking for target regions recursively.
9008   for (const Stmt *II : S->children())
9009     scanForTargetRegionsFunctions(II, ParentName);
9010 }
9011 
9012 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9013   // If emitting code for the host, we do not process FD here. Instead we do
9014   // the normal code generation.
9015   if (!CGM.getLangOpts().OpenMPIsDevice)
9016     return false;
9017 
9018   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9019   StringRef Name = CGM.getMangledName(GD);
9020   // Try to detect target regions in the function.
9021   if (const auto *FD = dyn_cast<FunctionDecl>(VD))
9022     scanForTargetRegionsFunctions(FD->getBody(), Name);
9023 
9024   // Do not to emit function if it is not marked as declare target.
9025   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9026          AlreadyEmittedTargetFunctions.count(Name) == 0;
9027 }
9028 
9029 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9030   if (!CGM.getLangOpts().OpenMPIsDevice)
9031     return false;
9032 
9033   // Check if there are Ctors/Dtors in this declaration and look for target
9034   // regions in it. We use the complete variant to produce the kernel name
9035   // mangling.
9036   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9037   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9038     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9039       StringRef ParentName =
9040           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9041       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9042     }
9043     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9044       StringRef ParentName =
9045           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9046       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9047     }
9048   }
9049 
9050   // Do not to emit variable if it is not marked as declare target.
9051   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9052       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9053           cast<VarDecl>(GD.getDecl()));
9054   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link) {
9055     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9056     return true;
9057   }
9058   return false;
9059 }
9060 
9061 llvm::Constant *
9062 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
9063                                                 const VarDecl *VD) {
9064   assert(VD->getType().isConstant(CGM.getContext()) &&
9065          "Expected constant variable.");
9066   StringRef VarName;
9067   llvm::Constant *Addr;
9068   llvm::GlobalValue::LinkageTypes Linkage;
9069   QualType Ty = VD->getType();
9070   SmallString<128> Buffer;
9071   {
9072     unsigned DeviceID;
9073     unsigned FileID;
9074     unsigned Line;
9075     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
9076                              FileID, Line);
9077     llvm::raw_svector_ostream OS(Buffer);
9078     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
9079        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
9080     VarName = OS.str();
9081   }
9082   Linkage = llvm::GlobalValue::InternalLinkage;
9083   Addr =
9084       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
9085                                   getDefaultFirstprivateAddressSpace());
9086   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
9087   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
9088   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
9089   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9090       VarName, Addr, VarSize,
9091       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
9092   return Addr;
9093 }
9094 
9095 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9096                                                    llvm::Constant *Addr) {
9097   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9098       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9099   if (!Res) {
9100     if (CGM.getLangOpts().OpenMPIsDevice) {
9101       // Register non-target variables being emitted in device code (debug info
9102       // may cause this).
9103       StringRef VarName = CGM.getMangledName(VD);
9104       EmittedNonTargetVariables.try_emplace(VarName, Addr);
9105     }
9106     return;
9107   }
9108   // Register declare target variables.
9109   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
9110   StringRef VarName;
9111   CharUnits VarSize;
9112   llvm::GlobalValue::LinkageTypes Linkage;
9113   switch (*Res) {
9114   case OMPDeclareTargetDeclAttr::MT_To:
9115     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9116     VarName = CGM.getMangledName(VD);
9117     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
9118       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
9119       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
9120     } else {
9121       VarSize = CharUnits::Zero();
9122     }
9123     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
9124     // Temp solution to prevent optimizations of the internal variables.
9125     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
9126       std::string RefName = getName({VarName, "ref"});
9127       if (!CGM.GetGlobalValue(RefName)) {
9128         llvm::Constant *AddrRef =
9129             getOrCreateInternalVariable(Addr->getType(), RefName);
9130         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
9131         GVAddrRef->setConstant(/*Val=*/true);
9132         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
9133         GVAddrRef->setInitializer(Addr);
9134         CGM.addCompilerUsedGlobal(GVAddrRef);
9135       }
9136     }
9137     break;
9138   case OMPDeclareTargetDeclAttr::MT_Link:
9139     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
9140     if (CGM.getLangOpts().OpenMPIsDevice) {
9141       VarName = Addr->getName();
9142       Addr = nullptr;
9143     } else {
9144       VarName = getAddrOfDeclareTargetLink(VD).getName();
9145       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetLink(VD).getPointer());
9146     }
9147     VarSize = CGM.getPointerSize();
9148     Linkage = llvm::GlobalValue::WeakAnyLinkage;
9149     break;
9150   }
9151   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9152       VarName, Addr, VarSize, Flags, Linkage);
9153 }
9154 
9155 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
9156   if (isa<FunctionDecl>(GD.getDecl()) ||
9157       isa<OMPDeclareReductionDecl>(GD.getDecl()))
9158     return emitTargetFunctions(GD);
9159 
9160   return emitTargetGlobalVariable(GD);
9161 }
9162 
9163 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
9164   for (const VarDecl *VD : DeferredGlobalVariables) {
9165     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9166         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9167     if (!Res)
9168       continue;
9169     if (*Res == OMPDeclareTargetDeclAttr::MT_To) {
9170       CGM.EmitGlobal(VD);
9171     } else {
9172       assert(*Res == OMPDeclareTargetDeclAttr::MT_Link &&
9173              "Expected to or link clauses.");
9174       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD);
9175     }
9176   }
9177 }
9178 
9179 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
9180     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
9181   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
9182          " Expected target-based directive.");
9183 }
9184 
9185 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
9186                                                        LangAS &AS) {
9187   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
9188     return false;
9189   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
9190   switch(A->getAllocatorType()) {
9191   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
9192   // Not supported, fallback to the default mem space.
9193   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
9194   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
9195   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
9196   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
9197   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
9198   case OMPAllocateDeclAttr::OMPConstMemAlloc:
9199   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
9200     AS = LangAS::Default;
9201     return true;
9202   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
9203     llvm_unreachable("Expected predefined allocator for the variables with the "
9204                      "static storage.");
9205   }
9206   return false;
9207 }
9208 
9209 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
9210     CodeGenModule &CGM)
9211     : CGM(CGM) {
9212   if (CGM.getLangOpts().OpenMPIsDevice) {
9213     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
9214     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
9215   }
9216 }
9217 
9218 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
9219   if (CGM.getLangOpts().OpenMPIsDevice)
9220     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
9221 }
9222 
9223 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
9224   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
9225     return true;
9226 
9227   StringRef Name = CGM.getMangledName(GD);
9228   const auto *D = cast<FunctionDecl>(GD.getDecl());
9229   // Do not to emit function if it is marked as declare target as it was already
9230   // emitted.
9231   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
9232     if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) {
9233       if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name)))
9234         return !F->isDeclaration();
9235       return false;
9236     }
9237     return true;
9238   }
9239 
9240   return !AlreadyEmittedTargetFunctions.insert(Name).second;
9241 }
9242 
9243 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
9244   // If we have offloading in the current module, we need to emit the entries
9245   // now and register the offloading descriptor.
9246   createOffloadEntriesAndInfoMetadata();
9247 
9248   // Create and register the offloading binary descriptors. This is the main
9249   // entity that captures all the information about offloading in the current
9250   // compilation unit.
9251   return createOffloadingBinaryDescriptorRegistration();
9252 }
9253 
9254 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
9255                                     const OMPExecutableDirective &D,
9256                                     SourceLocation Loc,
9257                                     llvm::Function *OutlinedFn,
9258                                     ArrayRef<llvm::Value *> CapturedVars) {
9259   if (!CGF.HaveInsertPoint())
9260     return;
9261 
9262   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9263   CodeGenFunction::RunCleanupsScope Scope(CGF);
9264 
9265   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
9266   llvm::Value *Args[] = {
9267       RTLoc,
9268       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
9269       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
9270   llvm::SmallVector<llvm::Value *, 16> RealArgs;
9271   RealArgs.append(std::begin(Args), std::end(Args));
9272   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
9273 
9274   llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
9275   CGF.EmitRuntimeCall(RTLFn, RealArgs);
9276 }
9277 
9278 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
9279                                          const Expr *NumTeams,
9280                                          const Expr *ThreadLimit,
9281                                          SourceLocation Loc) {
9282   if (!CGF.HaveInsertPoint())
9283     return;
9284 
9285   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9286 
9287   llvm::Value *NumTeamsVal =
9288       NumTeams
9289           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
9290                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
9291           : CGF.Builder.getInt32(0);
9292 
9293   llvm::Value *ThreadLimitVal =
9294       ThreadLimit
9295           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
9296                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
9297           : CGF.Builder.getInt32(0);
9298 
9299   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
9300   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
9301                                      ThreadLimitVal};
9302   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
9303                       PushNumTeamsArgs);
9304 }
9305 
9306 void CGOpenMPRuntime::emitTargetDataCalls(
9307     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
9308     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
9309   if (!CGF.HaveInsertPoint())
9310     return;
9311 
9312   // Action used to replace the default codegen action and turn privatization
9313   // off.
9314   PrePostActionTy NoPrivAction;
9315 
9316   // Generate the code for the opening of the data environment. Capture all the
9317   // arguments of the runtime call by reference because they are used in the
9318   // closing of the region.
9319   auto &&BeginThenGen = [this, &D, Device, &Info,
9320                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
9321     // Fill up the arrays with all the mapped variables.
9322     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9323     MappableExprsHandler::MapValuesArrayTy Pointers;
9324     MappableExprsHandler::MapValuesArrayTy Sizes;
9325     MappableExprsHandler::MapFlagsArrayTy MapTypes;
9326 
9327     // Get map clause information.
9328     MappableExprsHandler MCHandler(D, CGF);
9329     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
9330 
9331     // Fill up the arrays and create the arguments.
9332     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9333 
9334     llvm::Value *BasePointersArrayArg = nullptr;
9335     llvm::Value *PointersArrayArg = nullptr;
9336     llvm::Value *SizesArrayArg = nullptr;
9337     llvm::Value *MapTypesArrayArg = nullptr;
9338     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
9339                                  SizesArrayArg, MapTypesArrayArg, Info);
9340 
9341     // Emit device ID if any.
9342     llvm::Value *DeviceID = nullptr;
9343     if (Device) {
9344       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9345                                            CGF.Int64Ty, /*isSigned=*/true);
9346     } else {
9347       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9348     }
9349 
9350     // Emit the number of elements in the offloading arrays.
9351     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
9352 
9353     llvm::Value *OffloadingArgs[] = {
9354         DeviceID,         PointerNum,    BasePointersArrayArg,
9355         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
9356     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin),
9357                         OffloadingArgs);
9358 
9359     // If device pointer privatization is required, emit the body of the region
9360     // here. It will have to be duplicated: with and without privatization.
9361     if (!Info.CaptureDeviceAddrMap.empty())
9362       CodeGen(CGF);
9363   };
9364 
9365   // Generate code for the closing of the data region.
9366   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
9367                                             PrePostActionTy &) {
9368     assert(Info.isValid() && "Invalid data environment closing arguments.");
9369 
9370     llvm::Value *BasePointersArrayArg = nullptr;
9371     llvm::Value *PointersArrayArg = nullptr;
9372     llvm::Value *SizesArrayArg = nullptr;
9373     llvm::Value *MapTypesArrayArg = nullptr;
9374     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
9375                                  SizesArrayArg, MapTypesArrayArg, Info);
9376 
9377     // Emit device ID if any.
9378     llvm::Value *DeviceID = nullptr;
9379     if (Device) {
9380       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9381                                            CGF.Int64Ty, /*isSigned=*/true);
9382     } else {
9383       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9384     }
9385 
9386     // Emit the number of elements in the offloading arrays.
9387     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
9388 
9389     llvm::Value *OffloadingArgs[] = {
9390         DeviceID,         PointerNum,    BasePointersArrayArg,
9391         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
9392     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end),
9393                         OffloadingArgs);
9394   };
9395 
9396   // If we need device pointer privatization, we need to emit the body of the
9397   // region with no privatization in the 'else' branch of the conditional.
9398   // Otherwise, we don't have to do anything.
9399   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
9400                                                          PrePostActionTy &) {
9401     if (!Info.CaptureDeviceAddrMap.empty()) {
9402       CodeGen.setAction(NoPrivAction);
9403       CodeGen(CGF);
9404     }
9405   };
9406 
9407   // We don't have to do anything to close the region if the if clause evaluates
9408   // to false.
9409   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
9410 
9411   if (IfCond) {
9412     emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
9413   } else {
9414     RegionCodeGenTy RCG(BeginThenGen);
9415     RCG(CGF);
9416   }
9417 
9418   // If we don't require privatization of device pointers, we emit the body in
9419   // between the runtime calls. This avoids duplicating the body code.
9420   if (Info.CaptureDeviceAddrMap.empty()) {
9421     CodeGen.setAction(NoPrivAction);
9422     CodeGen(CGF);
9423   }
9424 
9425   if (IfCond) {
9426     emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen);
9427   } else {
9428     RegionCodeGenTy RCG(EndThenGen);
9429     RCG(CGF);
9430   }
9431 }
9432 
9433 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
9434     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
9435     const Expr *Device) {
9436   if (!CGF.HaveInsertPoint())
9437     return;
9438 
9439   assert((isa<OMPTargetEnterDataDirective>(D) ||
9440           isa<OMPTargetExitDataDirective>(D) ||
9441           isa<OMPTargetUpdateDirective>(D)) &&
9442          "Expecting either target enter, exit data, or update directives.");
9443 
9444   CodeGenFunction::OMPTargetDataInfo InputInfo;
9445   llvm::Value *MapTypesArray = nullptr;
9446   // Generate the code for the opening of the data environment.
9447   auto &&ThenGen = [this, &D, Device, &InputInfo,
9448                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
9449     // Emit device ID if any.
9450     llvm::Value *DeviceID = nullptr;
9451     if (Device) {
9452       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9453                                            CGF.Int64Ty, /*isSigned=*/true);
9454     } else {
9455       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9456     }
9457 
9458     // Emit the number of elements in the offloading arrays.
9459     llvm::Constant *PointerNum =
9460         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9461 
9462     llvm::Value *OffloadingArgs[] = {DeviceID,
9463                                      PointerNum,
9464                                      InputInfo.BasePointersArray.getPointer(),
9465                                      InputInfo.PointersArray.getPointer(),
9466                                      InputInfo.SizesArray.getPointer(),
9467                                      MapTypesArray};
9468 
9469     // Select the right runtime function call for each expected standalone
9470     // directive.
9471     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9472     OpenMPRTLFunction RTLFn;
9473     switch (D.getDirectiveKind()) {
9474     case OMPD_target_enter_data:
9475       RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
9476                         : OMPRTL__tgt_target_data_begin;
9477       break;
9478     case OMPD_target_exit_data:
9479       RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
9480                         : OMPRTL__tgt_target_data_end;
9481       break;
9482     case OMPD_target_update:
9483       RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
9484                         : OMPRTL__tgt_target_data_update;
9485       break;
9486     case OMPD_parallel:
9487     case OMPD_for:
9488     case OMPD_parallel_for:
9489     case OMPD_parallel_sections:
9490     case OMPD_for_simd:
9491     case OMPD_parallel_for_simd:
9492     case OMPD_cancel:
9493     case OMPD_cancellation_point:
9494     case OMPD_ordered:
9495     case OMPD_threadprivate:
9496     case OMPD_allocate:
9497     case OMPD_task:
9498     case OMPD_simd:
9499     case OMPD_sections:
9500     case OMPD_section:
9501     case OMPD_single:
9502     case OMPD_master:
9503     case OMPD_critical:
9504     case OMPD_taskyield:
9505     case OMPD_barrier:
9506     case OMPD_taskwait:
9507     case OMPD_taskgroup:
9508     case OMPD_atomic:
9509     case OMPD_flush:
9510     case OMPD_teams:
9511     case OMPD_target_data:
9512     case OMPD_distribute:
9513     case OMPD_distribute_simd:
9514     case OMPD_distribute_parallel_for:
9515     case OMPD_distribute_parallel_for_simd:
9516     case OMPD_teams_distribute:
9517     case OMPD_teams_distribute_simd:
9518     case OMPD_teams_distribute_parallel_for:
9519     case OMPD_teams_distribute_parallel_for_simd:
9520     case OMPD_declare_simd:
9521     case OMPD_declare_target:
9522     case OMPD_end_declare_target:
9523     case OMPD_declare_reduction:
9524     case OMPD_declare_mapper:
9525     case OMPD_taskloop:
9526     case OMPD_taskloop_simd:
9527     case OMPD_target:
9528     case OMPD_target_simd:
9529     case OMPD_target_teams_distribute:
9530     case OMPD_target_teams_distribute_simd:
9531     case OMPD_target_teams_distribute_parallel_for:
9532     case OMPD_target_teams_distribute_parallel_for_simd:
9533     case OMPD_target_teams:
9534     case OMPD_target_parallel:
9535     case OMPD_target_parallel_for:
9536     case OMPD_target_parallel_for_simd:
9537     case OMPD_requires:
9538     case OMPD_unknown:
9539       llvm_unreachable("Unexpected standalone target data directive.");
9540       break;
9541     }
9542     CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
9543   };
9544 
9545   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
9546                              CodeGenFunction &CGF, PrePostActionTy &) {
9547     // Fill up the arrays with all the mapped variables.
9548     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9549     MappableExprsHandler::MapValuesArrayTy Pointers;
9550     MappableExprsHandler::MapValuesArrayTy Sizes;
9551     MappableExprsHandler::MapFlagsArrayTy MapTypes;
9552 
9553     // Get map clause information.
9554     MappableExprsHandler MEHandler(D, CGF);
9555     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
9556 
9557     TargetDataInfo Info;
9558     // Fill up the arrays and create the arguments.
9559     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9560     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
9561                                  Info.PointersArray, Info.SizesArray,
9562                                  Info.MapTypesArray, Info);
9563     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9564     InputInfo.BasePointersArray =
9565         Address(Info.BasePointersArray, CGM.getPointerAlign());
9566     InputInfo.PointersArray =
9567         Address(Info.PointersArray, CGM.getPointerAlign());
9568     InputInfo.SizesArray =
9569         Address(Info.SizesArray, CGM.getPointerAlign());
9570     MapTypesArray = Info.MapTypesArray;
9571     if (D.hasClausesOfKind<OMPDependClause>())
9572       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9573     else
9574       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9575   };
9576 
9577   if (IfCond) {
9578     emitOMPIfClause(CGF, IfCond, TargetThenGen,
9579                     [](CodeGenFunction &CGF, PrePostActionTy &) {});
9580   } else {
9581     RegionCodeGenTy ThenRCG(TargetThenGen);
9582     ThenRCG(CGF);
9583   }
9584 }
9585 
9586 namespace {
9587   /// Kind of parameter in a function with 'declare simd' directive.
9588   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
9589   /// Attribute set of the parameter.
9590   struct ParamAttrTy {
9591     ParamKindTy Kind = Vector;
9592     llvm::APSInt StrideOrArg;
9593     llvm::APSInt Alignment;
9594   };
9595 } // namespace
9596 
9597 static unsigned evaluateCDTSize(const FunctionDecl *FD,
9598                                 ArrayRef<ParamAttrTy> ParamAttrs) {
9599   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
9600   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
9601   // of that clause. The VLEN value must be power of 2.
9602   // In other case the notion of the function`s "characteristic data type" (CDT)
9603   // is used to compute the vector length.
9604   // CDT is defined in the following order:
9605   //   a) For non-void function, the CDT is the return type.
9606   //   b) If the function has any non-uniform, non-linear parameters, then the
9607   //   CDT is the type of the first such parameter.
9608   //   c) If the CDT determined by a) or b) above is struct, union, or class
9609   //   type which is pass-by-value (except for the type that maps to the
9610   //   built-in complex data type), the characteristic data type is int.
9611   //   d) If none of the above three cases is applicable, the CDT is int.
9612   // The VLEN is then determined based on the CDT and the size of vector
9613   // register of that ISA for which current vector version is generated. The
9614   // VLEN is computed using the formula below:
9615   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
9616   // where vector register size specified in section 3.2.1 Registers and the
9617   // Stack Frame of original AMD64 ABI document.
9618   QualType RetType = FD->getReturnType();
9619   if (RetType.isNull())
9620     return 0;
9621   ASTContext &C = FD->getASTContext();
9622   QualType CDT;
9623   if (!RetType.isNull() && !RetType->isVoidType()) {
9624     CDT = RetType;
9625   } else {
9626     unsigned Offset = 0;
9627     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
9628       if (ParamAttrs[Offset].Kind == Vector)
9629         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
9630       ++Offset;
9631     }
9632     if (CDT.isNull()) {
9633       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
9634         if (ParamAttrs[I + Offset].Kind == Vector) {
9635           CDT = FD->getParamDecl(I)->getType();
9636           break;
9637         }
9638       }
9639     }
9640   }
9641   if (CDT.isNull())
9642     CDT = C.IntTy;
9643   CDT = CDT->getCanonicalTypeUnqualified();
9644   if (CDT->isRecordType() || CDT->isUnionType())
9645     CDT = C.IntTy;
9646   return C.getTypeSize(CDT);
9647 }
9648 
9649 static void
9650 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
9651                            const llvm::APSInt &VLENVal,
9652                            ArrayRef<ParamAttrTy> ParamAttrs,
9653                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
9654   struct ISADataTy {
9655     char ISA;
9656     unsigned VecRegSize;
9657   };
9658   ISADataTy ISAData[] = {
9659       {
9660           'b', 128
9661       }, // SSE
9662       {
9663           'c', 256
9664       }, // AVX
9665       {
9666           'd', 256
9667       }, // AVX2
9668       {
9669           'e', 512
9670       }, // AVX512
9671   };
9672   llvm::SmallVector<char, 2> Masked;
9673   switch (State) {
9674   case OMPDeclareSimdDeclAttr::BS_Undefined:
9675     Masked.push_back('N');
9676     Masked.push_back('M');
9677     break;
9678   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
9679     Masked.push_back('N');
9680     break;
9681   case OMPDeclareSimdDeclAttr::BS_Inbranch:
9682     Masked.push_back('M');
9683     break;
9684   }
9685   for (char Mask : Masked) {
9686     for (const ISADataTy &Data : ISAData) {
9687       SmallString<256> Buffer;
9688       llvm::raw_svector_ostream Out(Buffer);
9689       Out << "_ZGV" << Data.ISA << Mask;
9690       if (!VLENVal) {
9691         Out << llvm::APSInt::getUnsigned(Data.VecRegSize /
9692                                          evaluateCDTSize(FD, ParamAttrs));
9693       } else {
9694         Out << VLENVal;
9695       }
9696       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
9697         switch (ParamAttr.Kind){
9698         case LinearWithVarStride:
9699           Out << 's' << ParamAttr.StrideOrArg;
9700           break;
9701         case Linear:
9702           Out << 'l';
9703           if (!!ParamAttr.StrideOrArg)
9704             Out << ParamAttr.StrideOrArg;
9705           break;
9706         case Uniform:
9707           Out << 'u';
9708           break;
9709         case Vector:
9710           Out << 'v';
9711           break;
9712         }
9713         if (!!ParamAttr.Alignment)
9714           Out << 'a' << ParamAttr.Alignment;
9715       }
9716       Out << '_' << Fn->getName();
9717       Fn->addFnAttr(Out.str());
9718     }
9719   }
9720 }
9721 
9722 // This are the Functions that are needed to mangle the name of the
9723 // vector functions generated by the compiler, according to the rules
9724 // defined in the "Vector Function ABI specifications for AArch64",
9725 // available at
9726 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
9727 
9728 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
9729 ///
9730 /// TODO: Need to implement the behavior for reference marked with a
9731 /// var or no linear modifiers (1.b in the section). For this, we
9732 /// need to extend ParamKindTy to support the linear modifiers.
9733 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
9734   QT = QT.getCanonicalType();
9735 
9736   if (QT->isVoidType())
9737     return false;
9738 
9739   if (Kind == ParamKindTy::Uniform)
9740     return false;
9741 
9742   if (Kind == ParamKindTy::Linear)
9743     return false;
9744 
9745   // TODO: Handle linear references with modifiers
9746 
9747   if (Kind == ParamKindTy::LinearWithVarStride)
9748     return false;
9749 
9750   return true;
9751 }
9752 
9753 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
9754 static bool getAArch64PBV(QualType QT, ASTContext &C) {
9755   QT = QT.getCanonicalType();
9756   unsigned Size = C.getTypeSize(QT);
9757 
9758   // Only scalars and complex within 16 bytes wide set PVB to true.
9759   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
9760     return false;
9761 
9762   if (QT->isFloatingType())
9763     return true;
9764 
9765   if (QT->isIntegerType())
9766     return true;
9767 
9768   if (QT->isPointerType())
9769     return true;
9770 
9771   // TODO: Add support for complex types (section 3.1.2, item 2).
9772 
9773   return false;
9774 }
9775 
9776 /// Computes the lane size (LS) of a return type or of an input parameter,
9777 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
9778 /// TODO: Add support for references, section 3.2.1, item 1.
9779 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
9780   if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
9781     QualType PTy = QT.getCanonicalType()->getPointeeType();
9782     if (getAArch64PBV(PTy, C))
9783       return C.getTypeSize(PTy);
9784   }
9785   if (getAArch64PBV(QT, C))
9786     return C.getTypeSize(QT);
9787 
9788   return C.getTypeSize(C.getUIntPtrType());
9789 }
9790 
9791 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
9792 // signature of the scalar function, as defined in 3.2.2 of the
9793 // AAVFABI.
9794 static std::tuple<unsigned, unsigned, bool>
9795 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
9796   QualType RetType = FD->getReturnType().getCanonicalType();
9797 
9798   ASTContext &C = FD->getASTContext();
9799 
9800   bool OutputBecomesInput = false;
9801 
9802   llvm::SmallVector<unsigned, 8> Sizes;
9803   if (!RetType->isVoidType()) {
9804     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
9805     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
9806       OutputBecomesInput = true;
9807   }
9808   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
9809     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
9810     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
9811   }
9812 
9813   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
9814   // The LS of a function parameter / return value can only be a power
9815   // of 2, starting from 8 bits, up to 128.
9816   assert(std::all_of(Sizes.begin(), Sizes.end(),
9817                      [](unsigned Size) {
9818                        return Size == 8 || Size == 16 || Size == 32 ||
9819                               Size == 64 || Size == 128;
9820                      }) &&
9821          "Invalid size");
9822 
9823   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
9824                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
9825                          OutputBecomesInput);
9826 }
9827 
9828 /// Mangle the parameter part of the vector function name according to
9829 /// their OpenMP classification. The mangling function is defined in
9830 /// section 3.5 of the AAVFABI.
9831 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
9832   SmallString<256> Buffer;
9833   llvm::raw_svector_ostream Out(Buffer);
9834   for (const auto &ParamAttr : ParamAttrs) {
9835     switch (ParamAttr.Kind) {
9836     case LinearWithVarStride:
9837       Out << "ls" << ParamAttr.StrideOrArg;
9838       break;
9839     case Linear:
9840       Out << 'l';
9841       // Don't print the step value if it is not present or if it is
9842       // equal to 1.
9843       if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1)
9844         Out << ParamAttr.StrideOrArg;
9845       break;
9846     case Uniform:
9847       Out << 'u';
9848       break;
9849     case Vector:
9850       Out << 'v';
9851       break;
9852     }
9853 
9854     if (!!ParamAttr.Alignment)
9855       Out << 'a' << ParamAttr.Alignment;
9856   }
9857 
9858   return Out.str();
9859 }
9860 
9861 // Function used to add the attribute. The parameter `VLEN` is
9862 // templated to allow the use of "x" when targeting scalable functions
9863 // for SVE.
9864 template <typename T>
9865 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
9866                                  char ISA, StringRef ParSeq,
9867                                  StringRef MangledName, bool OutputBecomesInput,
9868                                  llvm::Function *Fn) {
9869   SmallString<256> Buffer;
9870   llvm::raw_svector_ostream Out(Buffer);
9871   Out << Prefix << ISA << LMask << VLEN;
9872   if (OutputBecomesInput)
9873     Out << "v";
9874   Out << ParSeq << "_" << MangledName;
9875   Fn->addFnAttr(Out.str());
9876 }
9877 
9878 // Helper function to generate the Advanced SIMD names depending on
9879 // the value of the NDS when simdlen is not present.
9880 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
9881                                       StringRef Prefix, char ISA,
9882                                       StringRef ParSeq, StringRef MangledName,
9883                                       bool OutputBecomesInput,
9884                                       llvm::Function *Fn) {
9885   switch (NDS) {
9886   case 8:
9887     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
9888                          OutputBecomesInput, Fn);
9889     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
9890                          OutputBecomesInput, Fn);
9891     break;
9892   case 16:
9893     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
9894                          OutputBecomesInput, Fn);
9895     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
9896                          OutputBecomesInput, Fn);
9897     break;
9898   case 32:
9899     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
9900                          OutputBecomesInput, Fn);
9901     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
9902                          OutputBecomesInput, Fn);
9903     break;
9904   case 64:
9905   case 128:
9906     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
9907                          OutputBecomesInput, Fn);
9908     break;
9909   default:
9910     llvm_unreachable("Scalar type is too wide.");
9911   }
9912 }
9913 
9914 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
9915 static void emitAArch64DeclareSimdFunction(
9916     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
9917     ArrayRef<ParamAttrTy> ParamAttrs,
9918     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
9919     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
9920 
9921   // Get basic data for building the vector signature.
9922   const auto Data = getNDSWDS(FD, ParamAttrs);
9923   const unsigned NDS = std::get<0>(Data);
9924   const unsigned WDS = std::get<1>(Data);
9925   const bool OutputBecomesInput = std::get<2>(Data);
9926 
9927   // Check the values provided via `simdlen` by the user.
9928   // 1. A `simdlen(1)` doesn't produce vector signatures,
9929   if (UserVLEN == 1) {
9930     unsigned DiagID = CGM.getDiags().getCustomDiagID(
9931         DiagnosticsEngine::Warning,
9932         "The clause simdlen(1) has no effect when targeting aarch64.");
9933     CGM.getDiags().Report(SLoc, DiagID);
9934     return;
9935   }
9936 
9937   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
9938   // Advanced SIMD output.
9939   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
9940     unsigned DiagID = CGM.getDiags().getCustomDiagID(
9941         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
9942                                     "power of 2 when targeting Advanced SIMD.");
9943     CGM.getDiags().Report(SLoc, DiagID);
9944     return;
9945   }
9946 
9947   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
9948   // limits.
9949   if (ISA == 's' && UserVLEN != 0) {
9950     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
9951       unsigned DiagID = CGM.getDiags().getCustomDiagID(
9952           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
9953                                       "lanes in the architectural constraints "
9954                                       "for SVE (min is 128-bit, max is "
9955                                       "2048-bit, by steps of 128-bit)");
9956       CGM.getDiags().Report(SLoc, DiagID) << WDS;
9957       return;
9958     }
9959   }
9960 
9961   // Sort out parameter sequence.
9962   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
9963   StringRef Prefix = "_ZGV";
9964   // Generate simdlen from user input (if any).
9965   if (UserVLEN) {
9966     if (ISA == 's') {
9967       // SVE generates only a masked function.
9968       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
9969                            OutputBecomesInput, Fn);
9970     } else {
9971       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
9972       // Advanced SIMD generates one or two functions, depending on
9973       // the `[not]inbranch` clause.
9974       switch (State) {
9975       case OMPDeclareSimdDeclAttr::BS_Undefined:
9976         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
9977                              OutputBecomesInput, Fn);
9978         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
9979                              OutputBecomesInput, Fn);
9980         break;
9981       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
9982         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
9983                              OutputBecomesInput, Fn);
9984         break;
9985       case OMPDeclareSimdDeclAttr::BS_Inbranch:
9986         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
9987                              OutputBecomesInput, Fn);
9988         break;
9989       }
9990     }
9991   } else {
9992     // If no user simdlen is provided, follow the AAVFABI rules for
9993     // generating the vector length.
9994     if (ISA == 's') {
9995       // SVE, section 3.4.1, item 1.
9996       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
9997                            OutputBecomesInput, Fn);
9998     } else {
9999       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10000       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10001       // two vector names depending on the use of the clause
10002       // `[not]inbranch`.
10003       switch (State) {
10004       case OMPDeclareSimdDeclAttr::BS_Undefined:
10005         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10006                                   OutputBecomesInput, Fn);
10007         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10008                                   OutputBecomesInput, Fn);
10009         break;
10010       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10011         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10012                                   OutputBecomesInput, Fn);
10013         break;
10014       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10015         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10016                                   OutputBecomesInput, Fn);
10017         break;
10018       }
10019     }
10020   }
10021 }
10022 
10023 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10024                                               llvm::Function *Fn) {
10025   ASTContext &C = CGM.getContext();
10026   FD = FD->getMostRecentDecl();
10027   // Map params to their positions in function decl.
10028   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10029   if (isa<CXXMethodDecl>(FD))
10030     ParamPositions.try_emplace(FD, 0);
10031   unsigned ParamPos = ParamPositions.size();
10032   for (const ParmVarDecl *P : FD->parameters()) {
10033     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10034     ++ParamPos;
10035   }
10036   while (FD) {
10037     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10038       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10039       // Mark uniform parameters.
10040       for (const Expr *E : Attr->uniforms()) {
10041         E = E->IgnoreParenImpCasts();
10042         unsigned Pos;
10043         if (isa<CXXThisExpr>(E)) {
10044           Pos = ParamPositions[FD];
10045         } else {
10046           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10047                                 ->getCanonicalDecl();
10048           Pos = ParamPositions[PVD];
10049         }
10050         ParamAttrs[Pos].Kind = Uniform;
10051       }
10052       // Get alignment info.
10053       auto NI = Attr->alignments_begin();
10054       for (const Expr *E : Attr->aligneds()) {
10055         E = E->IgnoreParenImpCasts();
10056         unsigned Pos;
10057         QualType ParmTy;
10058         if (isa<CXXThisExpr>(E)) {
10059           Pos = ParamPositions[FD];
10060           ParmTy = E->getType();
10061         } else {
10062           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10063                                 ->getCanonicalDecl();
10064           Pos = ParamPositions[PVD];
10065           ParmTy = PVD->getType();
10066         }
10067         ParamAttrs[Pos].Alignment =
10068             (*NI)
10069                 ? (*NI)->EvaluateKnownConstInt(C)
10070                 : llvm::APSInt::getUnsigned(
10071                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10072                           .getQuantity());
10073         ++NI;
10074       }
10075       // Mark linear parameters.
10076       auto SI = Attr->steps_begin();
10077       auto MI = Attr->modifiers_begin();
10078       for (const Expr *E : Attr->linears()) {
10079         E = E->IgnoreParenImpCasts();
10080         unsigned Pos;
10081         if (isa<CXXThisExpr>(E)) {
10082           Pos = ParamPositions[FD];
10083         } else {
10084           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10085                                 ->getCanonicalDecl();
10086           Pos = ParamPositions[PVD];
10087         }
10088         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
10089         ParamAttr.Kind = Linear;
10090         if (*SI) {
10091           Expr::EvalResult Result;
10092           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
10093             if (const auto *DRE =
10094                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
10095               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
10096                 ParamAttr.Kind = LinearWithVarStride;
10097                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
10098                     ParamPositions[StridePVD->getCanonicalDecl()]);
10099               }
10100             }
10101           } else {
10102             ParamAttr.StrideOrArg = Result.Val.getInt();
10103           }
10104         }
10105         ++SI;
10106         ++MI;
10107       }
10108       llvm::APSInt VLENVal;
10109       SourceLocation ExprLoc;
10110       const Expr *VLENExpr = Attr->getSimdlen();
10111       if (VLENExpr) {
10112         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
10113         ExprLoc = VLENExpr->getExprLoc();
10114       }
10115       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
10116       if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
10117           CGM.getTriple().getArch() == llvm::Triple::x86_64) {
10118         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
10119       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
10120         unsigned VLEN = VLENVal.getExtValue();
10121         StringRef MangledName = Fn->getName();
10122         if (CGM.getTarget().hasFeature("sve"))
10123           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10124                                          MangledName, 's', 128, Fn, ExprLoc);
10125         if (CGM.getTarget().hasFeature("neon"))
10126           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10127                                          MangledName, 'n', 128, Fn, ExprLoc);
10128       }
10129     }
10130     FD = FD->getPreviousDecl();
10131   }
10132 }
10133 
10134 namespace {
10135 /// Cleanup action for doacross support.
10136 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
10137 public:
10138   static const int DoacrossFinArgs = 2;
10139 
10140 private:
10141   llvm::FunctionCallee RTLFn;
10142   llvm::Value *Args[DoacrossFinArgs];
10143 
10144 public:
10145   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
10146                     ArrayRef<llvm::Value *> CallArgs)
10147       : RTLFn(RTLFn) {
10148     assert(CallArgs.size() == DoacrossFinArgs);
10149     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10150   }
10151   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10152     if (!CGF.HaveInsertPoint())
10153       return;
10154     CGF.EmitRuntimeCall(RTLFn, Args);
10155   }
10156 };
10157 } // namespace
10158 
10159 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
10160                                        const OMPLoopDirective &D,
10161                                        ArrayRef<Expr *> NumIterations) {
10162   if (!CGF.HaveInsertPoint())
10163     return;
10164 
10165   ASTContext &C = CGM.getContext();
10166   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
10167   RecordDecl *RD;
10168   if (KmpDimTy.isNull()) {
10169     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
10170     //  kmp_int64 lo; // lower
10171     //  kmp_int64 up; // upper
10172     //  kmp_int64 st; // stride
10173     // };
10174     RD = C.buildImplicitRecord("kmp_dim");
10175     RD->startDefinition();
10176     addFieldToRecordDecl(C, RD, Int64Ty);
10177     addFieldToRecordDecl(C, RD, Int64Ty);
10178     addFieldToRecordDecl(C, RD, Int64Ty);
10179     RD->completeDefinition();
10180     KmpDimTy = C.getRecordType(RD);
10181   } else {
10182     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
10183   }
10184   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
10185   QualType ArrayTy =
10186       C.getConstantArrayType(KmpDimTy, Size, ArrayType::Normal, 0);
10187 
10188   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
10189   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
10190   enum { LowerFD = 0, UpperFD, StrideFD };
10191   // Fill dims with data.
10192   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
10193     LValue DimsLVal = CGF.MakeAddrLValue(
10194         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
10195     // dims.upper = num_iterations;
10196     LValue UpperLVal = CGF.EmitLValueForField(
10197         DimsLVal, *std::next(RD->field_begin(), UpperFD));
10198     llvm::Value *NumIterVal =
10199         CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]),
10200                                  D.getNumIterations()->getType(), Int64Ty,
10201                                  D.getNumIterations()->getExprLoc());
10202     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
10203     // dims.stride = 1;
10204     LValue StrideLVal = CGF.EmitLValueForField(
10205         DimsLVal, *std::next(RD->field_begin(), StrideFD));
10206     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
10207                           StrideLVal);
10208   }
10209 
10210   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
10211   // kmp_int32 num_dims, struct kmp_dim * dims);
10212   llvm::Value *Args[] = {
10213       emitUpdateLocation(CGF, D.getBeginLoc()),
10214       getThreadID(CGF, D.getBeginLoc()),
10215       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
10216       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
10217           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
10218           CGM.VoidPtrTy)};
10219 
10220   llvm::FunctionCallee RTLFn =
10221       createRuntimeFunction(OMPRTL__kmpc_doacross_init);
10222   CGF.EmitRuntimeCall(RTLFn, Args);
10223   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
10224       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
10225   llvm::FunctionCallee FiniRTLFn =
10226       createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
10227   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
10228                                              llvm::makeArrayRef(FiniArgs));
10229 }
10230 
10231 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
10232                                           const OMPDependClause *C) {
10233   QualType Int64Ty =
10234       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
10235   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
10236   QualType ArrayTy = CGM.getContext().getConstantArrayType(
10237       Int64Ty, Size, ArrayType::Normal, 0);
10238   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
10239   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
10240     const Expr *CounterVal = C->getLoopData(I);
10241     assert(CounterVal);
10242     llvm::Value *CntVal = CGF.EmitScalarConversion(
10243         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
10244         CounterVal->getExprLoc());
10245     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
10246                           /*Volatile=*/false, Int64Ty);
10247   }
10248   llvm::Value *Args[] = {
10249       emitUpdateLocation(CGF, C->getBeginLoc()),
10250       getThreadID(CGF, C->getBeginLoc()),
10251       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
10252   llvm::FunctionCallee RTLFn;
10253   if (C->getDependencyKind() == OMPC_DEPEND_source) {
10254     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
10255   } else {
10256     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
10257     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
10258   }
10259   CGF.EmitRuntimeCall(RTLFn, Args);
10260 }
10261 
10262 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
10263                                llvm::FunctionCallee Callee,
10264                                ArrayRef<llvm::Value *> Args) const {
10265   assert(Loc.isValid() && "Outlined function call location must be valid.");
10266   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
10267 
10268   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
10269     if (Fn->doesNotThrow()) {
10270       CGF.EmitNounwindRuntimeCall(Fn, Args);
10271       return;
10272     }
10273   }
10274   CGF.EmitRuntimeCall(Callee, Args);
10275 }
10276 
10277 void CGOpenMPRuntime::emitOutlinedFunctionCall(
10278     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
10279     ArrayRef<llvm::Value *> Args) const {
10280   emitCall(CGF, Loc, OutlinedFn, Args);
10281 }
10282 
10283 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
10284                                              const VarDecl *NativeParam,
10285                                              const VarDecl *TargetParam) const {
10286   return CGF.GetAddrOfLocalVar(NativeParam);
10287 }
10288 
10289 namespace {
10290 /// Cleanup action for allocate support.
10291 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
10292 public:
10293   static const int CleanupArgs = 3;
10294 
10295 private:
10296   llvm::FunctionCallee RTLFn;
10297   llvm::Value *Args[CleanupArgs];
10298 
10299 public:
10300   OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
10301                        ArrayRef<llvm::Value *> CallArgs)
10302       : RTLFn(RTLFn) {
10303     assert(CallArgs.size() == CleanupArgs &&
10304            "Size of arguments does not match.");
10305     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10306   }
10307   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10308     if (!CGF.HaveInsertPoint())
10309       return;
10310     CGF.EmitRuntimeCall(RTLFn, Args);
10311   }
10312 };
10313 } // namespace
10314 
10315 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
10316                                                    const VarDecl *VD) {
10317   if (!VD)
10318     return Address::invalid();
10319   const VarDecl *CVD = VD->getCanonicalDecl();
10320   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
10321     return Address::invalid();
10322   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
10323   // Use the default allocation.
10324   if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
10325       !AA->getAllocator())
10326     return Address::invalid();
10327   llvm::Value *Size;
10328   CharUnits Align = CGM.getContext().getDeclAlign(CVD);
10329   if (CVD->getType()->isVariablyModifiedType()) {
10330     Size = CGF.getTypeSize(CVD->getType());
10331     // Align the size: ((size + align - 1) / align) * align
10332     Size = CGF.Builder.CreateNUWAdd(
10333         Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
10334     Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
10335     Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
10336   } else {
10337     CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
10338     Size = CGM.getSize(Sz.alignTo(Align));
10339   }
10340   llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
10341   assert(AA->getAllocator() &&
10342          "Expected allocator expression for non-default allocator.");
10343   llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
10344   // According to the standard, the original allocator type is a enum (integer).
10345   // Convert to pointer type, if required.
10346   if (Allocator->getType()->isIntegerTy())
10347     Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
10348   else if (Allocator->getType()->isPointerTy())
10349     Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
10350                                                                 CGM.VoidPtrTy);
10351   llvm::Value *Args[] = {ThreadID, Size, Allocator};
10352 
10353   llvm::Value *Addr =
10354       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
10355                           CVD->getName() + ".void.addr");
10356   llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
10357                                                               Allocator};
10358   llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
10359 
10360   CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
10361                                                 llvm::makeArrayRef(FiniArgs));
10362   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
10363       Addr,
10364       CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
10365       CVD->getName() + ".addr");
10366   return Address(Addr, Align);
10367 }
10368 
10369 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
10370     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
10371     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
10372   llvm_unreachable("Not supported in SIMD-only mode");
10373 }
10374 
10375 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
10376     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
10377     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
10378   llvm_unreachable("Not supported in SIMD-only mode");
10379 }
10380 
10381 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
10382     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
10383     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
10384     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
10385     bool Tied, unsigned &NumberOfParts) {
10386   llvm_unreachable("Not supported in SIMD-only mode");
10387 }
10388 
10389 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
10390                                            SourceLocation Loc,
10391                                            llvm::Function *OutlinedFn,
10392                                            ArrayRef<llvm::Value *> CapturedVars,
10393                                            const Expr *IfCond) {
10394   llvm_unreachable("Not supported in SIMD-only mode");
10395 }
10396 
10397 void CGOpenMPSIMDRuntime::emitCriticalRegion(
10398     CodeGenFunction &CGF, StringRef CriticalName,
10399     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
10400     const Expr *Hint) {
10401   llvm_unreachable("Not supported in SIMD-only mode");
10402 }
10403 
10404 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
10405                                            const RegionCodeGenTy &MasterOpGen,
10406                                            SourceLocation Loc) {
10407   llvm_unreachable("Not supported in SIMD-only mode");
10408 }
10409 
10410 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
10411                                             SourceLocation Loc) {
10412   llvm_unreachable("Not supported in SIMD-only mode");
10413 }
10414 
10415 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
10416     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
10417     SourceLocation Loc) {
10418   llvm_unreachable("Not supported in SIMD-only mode");
10419 }
10420 
10421 void CGOpenMPSIMDRuntime::emitSingleRegion(
10422     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
10423     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
10424     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
10425     ArrayRef<const Expr *> AssignmentOps) {
10426   llvm_unreachable("Not supported in SIMD-only mode");
10427 }
10428 
10429 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
10430                                             const RegionCodeGenTy &OrderedOpGen,
10431                                             SourceLocation Loc,
10432                                             bool IsThreads) {
10433   llvm_unreachable("Not supported in SIMD-only mode");
10434 }
10435 
10436 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
10437                                           SourceLocation Loc,
10438                                           OpenMPDirectiveKind Kind,
10439                                           bool EmitChecks,
10440                                           bool ForceSimpleCall) {
10441   llvm_unreachable("Not supported in SIMD-only mode");
10442 }
10443 
10444 void CGOpenMPSIMDRuntime::emitForDispatchInit(
10445     CodeGenFunction &CGF, SourceLocation Loc,
10446     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
10447     bool Ordered, const DispatchRTInput &DispatchValues) {
10448   llvm_unreachable("Not supported in SIMD-only mode");
10449 }
10450 
10451 void CGOpenMPSIMDRuntime::emitForStaticInit(
10452     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
10453     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
10454   llvm_unreachable("Not supported in SIMD-only mode");
10455 }
10456 
10457 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
10458     CodeGenFunction &CGF, SourceLocation Loc,
10459     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
10460   llvm_unreachable("Not supported in SIMD-only mode");
10461 }
10462 
10463 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
10464                                                      SourceLocation Loc,
10465                                                      unsigned IVSize,
10466                                                      bool IVSigned) {
10467   llvm_unreachable("Not supported in SIMD-only mode");
10468 }
10469 
10470 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
10471                                               SourceLocation Loc,
10472                                               OpenMPDirectiveKind DKind) {
10473   llvm_unreachable("Not supported in SIMD-only mode");
10474 }
10475 
10476 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
10477                                               SourceLocation Loc,
10478                                               unsigned IVSize, bool IVSigned,
10479                                               Address IL, Address LB,
10480                                               Address UB, Address ST) {
10481   llvm_unreachable("Not supported in SIMD-only mode");
10482 }
10483 
10484 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
10485                                                llvm::Value *NumThreads,
10486                                                SourceLocation Loc) {
10487   llvm_unreachable("Not supported in SIMD-only mode");
10488 }
10489 
10490 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
10491                                              OpenMPProcBindClauseKind ProcBind,
10492                                              SourceLocation Loc) {
10493   llvm_unreachable("Not supported in SIMD-only mode");
10494 }
10495 
10496 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
10497                                                     const VarDecl *VD,
10498                                                     Address VDAddr,
10499                                                     SourceLocation Loc) {
10500   llvm_unreachable("Not supported in SIMD-only mode");
10501 }
10502 
10503 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
10504     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
10505     CodeGenFunction *CGF) {
10506   llvm_unreachable("Not supported in SIMD-only mode");
10507 }
10508 
10509 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
10510     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
10511   llvm_unreachable("Not supported in SIMD-only mode");
10512 }
10513 
10514 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
10515                                     ArrayRef<const Expr *> Vars,
10516                                     SourceLocation Loc) {
10517   llvm_unreachable("Not supported in SIMD-only mode");
10518 }
10519 
10520 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
10521                                        const OMPExecutableDirective &D,
10522                                        llvm::Function *TaskFunction,
10523                                        QualType SharedsTy, Address Shareds,
10524                                        const Expr *IfCond,
10525                                        const OMPTaskDataTy &Data) {
10526   llvm_unreachable("Not supported in SIMD-only mode");
10527 }
10528 
10529 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
10530     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
10531     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
10532     const Expr *IfCond, const OMPTaskDataTy &Data) {
10533   llvm_unreachable("Not supported in SIMD-only mode");
10534 }
10535 
10536 void CGOpenMPSIMDRuntime::emitReduction(
10537     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
10538     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
10539     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
10540   assert(Options.SimpleReduction && "Only simple reduction is expected.");
10541   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
10542                                  ReductionOps, Options);
10543 }
10544 
10545 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
10546     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
10547     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
10548   llvm_unreachable("Not supported in SIMD-only mode");
10549 }
10550 
10551 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
10552                                                   SourceLocation Loc,
10553                                                   ReductionCodeGen &RCG,
10554                                                   unsigned N) {
10555   llvm_unreachable("Not supported in SIMD-only mode");
10556 }
10557 
10558 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
10559                                                   SourceLocation Loc,
10560                                                   llvm::Value *ReductionsPtr,
10561                                                   LValue SharedLVal) {
10562   llvm_unreachable("Not supported in SIMD-only mode");
10563 }
10564 
10565 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
10566                                            SourceLocation Loc) {
10567   llvm_unreachable("Not supported in SIMD-only mode");
10568 }
10569 
10570 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
10571     CodeGenFunction &CGF, SourceLocation Loc,
10572     OpenMPDirectiveKind CancelRegion) {
10573   llvm_unreachable("Not supported in SIMD-only mode");
10574 }
10575 
10576 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
10577                                          SourceLocation Loc, const Expr *IfCond,
10578                                          OpenMPDirectiveKind CancelRegion) {
10579   llvm_unreachable("Not supported in SIMD-only mode");
10580 }
10581 
10582 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
10583     const OMPExecutableDirective &D, StringRef ParentName,
10584     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
10585     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
10586   llvm_unreachable("Not supported in SIMD-only mode");
10587 }
10588 
10589 void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF,
10590                                          const OMPExecutableDirective &D,
10591                                          llvm::Function *OutlinedFn,
10592                                          llvm::Value *OutlinedFnID,
10593                                          const Expr *IfCond,
10594                                          const Expr *Device) {
10595   llvm_unreachable("Not supported in SIMD-only mode");
10596 }
10597 
10598 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
10599   llvm_unreachable("Not supported in SIMD-only mode");
10600 }
10601 
10602 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10603   llvm_unreachable("Not supported in SIMD-only mode");
10604 }
10605 
10606 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
10607   return false;
10608 }
10609 
10610 llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() {
10611   return nullptr;
10612 }
10613 
10614 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
10615                                         const OMPExecutableDirective &D,
10616                                         SourceLocation Loc,
10617                                         llvm::Function *OutlinedFn,
10618                                         ArrayRef<llvm::Value *> CapturedVars) {
10619   llvm_unreachable("Not supported in SIMD-only mode");
10620 }
10621 
10622 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10623                                              const Expr *NumTeams,
10624                                              const Expr *ThreadLimit,
10625                                              SourceLocation Loc) {
10626   llvm_unreachable("Not supported in SIMD-only mode");
10627 }
10628 
10629 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
10630     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10631     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10632   llvm_unreachable("Not supported in SIMD-only mode");
10633 }
10634 
10635 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
10636     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10637     const Expr *Device) {
10638   llvm_unreachable("Not supported in SIMD-only mode");
10639 }
10640 
10641 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
10642                                            const OMPLoopDirective &D,
10643                                            ArrayRef<Expr *> NumIterations) {
10644   llvm_unreachable("Not supported in SIMD-only mode");
10645 }
10646 
10647 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
10648                                               const OMPDependClause *C) {
10649   llvm_unreachable("Not supported in SIMD-only mode");
10650 }
10651 
10652 const VarDecl *
10653 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
10654                                         const VarDecl *NativeParam) const {
10655   llvm_unreachable("Not supported in SIMD-only mode");
10656 }
10657 
10658 Address
10659 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
10660                                          const VarDecl *NativeParam,
10661                                          const VarDecl *TargetParam) const {
10662   llvm_unreachable("Not supported in SIMD-only mode");
10663 }
10664